void BinghamRendererThread::run()
{
    const Matrix* vertices = tess::vertices( m_lod );
    int numVerts = tess::n_vertices( m_lod );

    Matrix base = ( FMath::sh_base( (*vertices), m_order ) );

    int numThreads = GLFunctions::idealThreadCount;

    double radius( 0.0 );
    QMatrix4x4 mvp = m_pMatrix * m_mvMatrix;
    if ( ( m_orient & 1 ) == 1 )
    {
        int glyphs = m_nx * m_ny;
        m_verts->reserve( numVerts * glyphs * 7 );
        QVector4D pos( 0, 0, m_z, 1.0 );
        for( int yy = m_id; yy < m_ny; yy += numThreads )
        {
            for ( int xx = 0; xx < m_nx; ++xx )
            {
                int dataPos = xx + yy * m_nx + m_zi * m_nx * m_ny;
                if ( ( fabs( m_data->at( dataPos )[8] ) > 0.0001 ) )
                {
                    float locX = xx * m_dx + m_ax;
                    float locY = yy * m_dy + m_ay;

                    pos.setX( locX );
                    pos.setY( locY );
                    QVector4D test = mvp * pos;

                    if ( fabs( test.x() / 2.0 ) < 1.0 && fabs( test.y() / 2.0 ) < 1.0 )
                    {
                        for ( int k = 0; k < 3; ++k )
                        {
                            if ( k == 0 && m_render1 )
                            {
                                for( int i = 0; i < numVerts; ++i )
                                {
                                    ColumnVector cur( 3 );
                                    cur( 1 ) = (*vertices)( i+1, 1 );
                                    cur( 2 ) = (*vertices)( i+1, 2 );
                                    cur( 3 ) = (*vertices)( i+1, 3 );

                                    ColumnVector m1( 3 );
                                    m1( 1 ) = m_data->at( dataPos )[k*9];
                                    m1( 2 ) = m_data->at( dataPos )[k*9+1];
                                    m1( 3 ) = m_data->at( dataPos )[k*9+2];

                                    ColumnVector m2( 3 );
                                    m2( 1 ) = m_data->at( dataPos )[k*9+3];
                                    m2( 2 ) = m_data->at( dataPos )[k*9+4];
                                    m2( 3 ) = m_data->at( dataPos )[k*9+5];

                                    double val_1( FMath::iprod( m1, cur ) );
                                    double val_2( FMath::iprod( m2, cur ) );


                                    double k1 = m_data->at( dataPos )[k*9+6];
                                    double k2 = m_data->at( dataPos )[k*9+7];
                                    double f0 = m_data->at( dataPos )[k*9+8];

                                    radius =  f0 * exp( -( k1 * val_1 * val_1 + k2 * val_2 * val_2 ) ) ;

                                    m_verts->push_back( (*vertices)( i+1, 1 ) );
                                    m_verts->push_back( (*vertices)( i+1, 2 ) );
                                    m_verts->push_back( (*vertices)( i+1, 3 ) );
                                    m_verts->push_back( locX );
                                    m_verts->push_back( locY );
                                    m_verts->push_back( m_z );
                                    m_verts->push_back( radius );
                                }
                            }
                            if ( k == 1 && m_render2 )
                            {
                                for( int i = 0; i < numVerts; ++i )
                                {
                                    ColumnVector cur( 3 );
                                    cur( 1 ) = (*vertices)( i+1, 1 );
                                    cur( 2 ) = (*vertices)( i+1, 2 );
                                    cur( 3 ) = (*vertices)( i+1, 3 );

                                    ColumnVector m1( 3 );
                                    m1( 1 ) = m_data->at( dataPos )[k*9];
                                    m1( 2 ) = m_data->at( dataPos )[k*9+1];
                                    m1( 3 ) = m_data->at( dataPos )[k*9+2];

                                    ColumnVector m2( 3 );
                                    m2( 1 ) = m_data->at( dataPos )[k*9+3];
                                    m2( 2 ) = m_data->at( dataPos )[k*9+4];
                                    m2( 3 ) = m_data->at( dataPos )[k*9+5];

                                    double val_1( FMath::iprod( m1, cur ) );
                                    double val_2( FMath::iprod( m2, cur ) );


                                    double k1 = m_data->at( dataPos )[k*9+6];
                                    double k2 = m_data->at( dataPos )[k*9+7];
                                    double f0 = m_data->at( dataPos )[k*9+8];

                                    radius =  f0 * exp( -( k1 * val_1 * val_1 + k2 * val_2 * val_2 ) ) ;

                                    m_verts->push_back( (*vertices)( i+1, 1 ) );
                                    m_verts->push_back( (*vertices)( i+1, 2 ) );
                                    m_verts->push_back( (*vertices)( i+1, 3 ) );
                                    m_verts->push_back( locX );
                                    m_verts->push_back( locY );
                                    m_verts->push_back( m_z );
                                    m_verts->push_back( radius );
                                }
                            }
                            if ( k == 2 && m_render3 )
                            {
                                for( int i = 0; i < numVerts; ++i )
                                {
                                    ColumnVector cur( 3 );
                                    cur( 1 ) = (*vertices)( i+1, 1 );
                                    cur( 2 ) = (*vertices)( i+1, 2 );
                                    cur( 3 ) = (*vertices)( i+1, 3 );

                                    ColumnVector m1( 3 );
                                    m1( 1 ) = m_data->at( dataPos )[k*9];
                                    m1( 2 ) = m_data->at( dataPos )[k*9+1];
                                    m1( 3 ) = m_data->at( dataPos )[k*9+2];

                                    ColumnVector m2( 3 );
                                    m2( 1 ) = m_data->at( dataPos )[k*9+3];
                                    m2( 2 ) = m_data->at( dataPos )[k*9+4];
                                    m2( 3 ) = m_data->at( dataPos )[k*9+5];

                                    double val_1( FMath::iprod( m1, cur ) );
                                    double val_2( FMath::iprod( m2, cur ) );


                                    double k1 = m_data->at( dataPos )[k*9+6];
                                    double k2 = m_data->at( dataPos )[k*9+7];
                                    double f0 = m_data->at( dataPos )[k*9+8];

                                    radius =  f0 * exp( -( k1 * val_1 * val_1 + k2 * val_2 * val_2 ) ) ;

                                    m_verts->push_back( (*vertices)( i+1, 1 ) );
                                    m_verts->push_back( (*vertices)( i+1, 2 ) );
                                    m_verts->push_back( (*vertices)( i+1, 3 ) );
                                    m_verts->push_back( locX );
                                    m_verts->push_back( locY );
                                    m_verts->push_back( m_z );
                                    m_verts->push_back( radius );
                                }
                            }
                        }
                    }
                }
            }
        }
    }
    if ( ( m_orient & 2 ) == 2 )
    {
        int glyphs = m_nx * m_nz;
        m_verts->reserve( numVerts * glyphs * 7 );
        QVector4D pos( 0, m_y, 0, 1.0 );
        for( int zz = m_id; zz < m_nz; zz += numThreads )
        {
            for ( int xx = 0; xx < m_nx; ++xx )
            {
                int dataPos = xx + m_yi * m_nx + zz * m_nx * m_ny;
                if ( ( fabs( m_data->at( dataPos )[8] ) > 0.0001 ) )
                {
                    float locX = xx * m_dx + m_ax;
                    float locZ = zz * m_dz + m_az;

                    pos.setX( locX );
                    pos.setZ( locZ );
                    QVector4D test = mvp * pos;

                    if ( fabs( test.x() / 2.0 ) < 1.0 && fabs( test.y() / 2.0 ) < 1.0 )
                    {
                        for ( int k = 0; k < 3; ++k )
                        {
                            if ( k == 0 && m_render1 )
                            {
                                for( int i = 0; i < numVerts; ++i )
                                {
                                    ColumnVector cur( 3 );
                                    cur( 1 ) = (*vertices)( i+1, 1 );
                                    cur( 2 ) = (*vertices)( i+1, 2 );
                                    cur( 3 ) = (*vertices)( i+1, 3 );

                                    ColumnVector m1( 3 );
                                    m1( 1 ) = m_data->at( dataPos )[k*9];
                                    m1( 2 ) = m_data->at( dataPos )[k*9+1];
                                    m1( 3 ) = m_data->at( dataPos )[k*9+2];

                                    ColumnVector m2( 3 );
                                    m2( 1 ) = m_data->at( dataPos )[k*9+3];
                                    m2( 2 ) = m_data->at( dataPos )[k*9+4];
                                    m2( 3 ) = m_data->at( dataPos )[k*9+5];

                                    double val_1( FMath::iprod( m1, cur ) );
                                    double val_2( FMath::iprod( m2, cur ) );


                                    double k1 = m_data->at( dataPos )[k*9+6];
                                    double k2 = m_data->at( dataPos )[k*9+7];
                                    double f0 = m_data->at( dataPos )[k*9+8];

                                    radius =  f0 * exp( -( k1 * val_1 * val_1 + k2 * val_2 * val_2 ) ) ;

                                    m_verts->push_back( (*vertices)( i+1, 1 ) );
                                    m_verts->push_back( (*vertices)( i+1, 2 ) );
                                    m_verts->push_back( (*vertices)( i+1, 3 ) );
                                    m_verts->push_back( locX );
                                    m_verts->push_back( m_y );
                                    m_verts->push_back( locZ );
                                    m_verts->push_back( radius );
                                }
                            }
                            if ( k == 1 && m_render2 )
                            {
                                for( int i = 0; i < numVerts; ++i )
                                {
                                    ColumnVector cur( 3 );
                                    cur( 1 ) = (*vertices)( i+1, 1 );
                                    cur( 2 ) = (*vertices)( i+1, 2 );
                                    cur( 3 ) = (*vertices)( i+1, 3 );

                                    ColumnVector m1( 3 );
                                    m1( 1 ) = m_data->at( dataPos )[k*9];
                                    m1( 2 ) = m_data->at( dataPos )[k*9+1];
                                    m1( 3 ) = m_data->at( dataPos )[k*9+2];

                                    ColumnVector m2( 3 );
                                    m2( 1 ) = m_data->at( dataPos )[k*9+3];
                                    m2( 2 ) = m_data->at( dataPos )[k*9+4];
                                    m2( 3 ) = m_data->at( dataPos )[k*9+5];

                                    double val_1( FMath::iprod( m1, cur ) );
                                    double val_2( FMath::iprod( m2, cur ) );


                                    double k1 = m_data->at( dataPos )[k*9+6];
                                    double k2 = m_data->at( dataPos )[k*9+7];
                                    double f0 = m_data->at( dataPos )[k*9+8];

                                    radius =  f0 * exp( -( k1 * val_1 * val_1 + k2 * val_2 * val_2 ) ) ;

                                    m_verts->push_back( (*vertices)( i+1, 1 ) );
                                    m_verts->push_back( (*vertices)( i+1, 2 ) );
                                    m_verts->push_back( (*vertices)( i+1, 3 ) );
                                    m_verts->push_back( locX );
                                    m_verts->push_back( m_y );
                                    m_verts->push_back( locZ );
                                    m_verts->push_back( radius );
                                }
                            }
                            if ( k == 2 && m_render3 )
                            {
                                for( int i = 0; i < numVerts; ++i )
                                {
                                    ColumnVector cur( 3 );
                                    cur( 1 ) = (*vertices)( i+1, 1 );
                                    cur( 2 ) = (*vertices)( i+1, 2 );
                                    cur( 3 ) = (*vertices)( i+1, 3 );

                                    ColumnVector m1( 3 );
                                    m1( 1 ) = m_data->at( dataPos )[k*9];
                                    m1( 2 ) = m_data->at( dataPos )[k*9+1];
                                    m1( 3 ) = m_data->at( dataPos )[k*9+2];

                                    ColumnVector m2( 3 );
                                    m2( 1 ) = m_data->at( dataPos )[k*9+3];
                                    m2( 2 ) = m_data->at( dataPos )[k*9+4];
                                    m2( 3 ) = m_data->at( dataPos )[k*9+5];

                                    double val_1( FMath::iprod( m1, cur ) );
                                    double val_2( FMath::iprod( m2, cur ) );


                                    double k1 = m_data->at( dataPos )[k*9+6];
                                    double k2 = m_data->at( dataPos )[k*9+7];
                                    double f0 = m_data->at( dataPos )[k*9+8];

                                    radius =  f0 * exp( -( k1 * val_1 * val_1 + k2 * val_2 * val_2 ) ) ;

                                    m_verts->push_back( (*vertices)( i+1, 1 ) );
                                    m_verts->push_back( (*vertices)( i+1, 2 ) );
                                    m_verts->push_back( (*vertices)( i+1, 3 ) );
                                    m_verts->push_back( locX );
                                    m_verts->push_back( m_y );
                                    m_verts->push_back( locZ );
                                    m_verts->push_back( radius );
                                }
                            }
                        }
                    }
                }
            }
        }
    }
    if ( ( m_orient & 4 )  == 4 )
    {
        int glyphs = m_ny * m_nz;
        m_verts->reserve( numVerts * glyphs * 10 );
        QVector4D pos( m_x, 0, 0, 1.0 );
        for( int yy = m_id; yy < m_ny; yy += numThreads )
        {
            for ( int zz = 0; zz < m_nz; ++zz )
            {
                int dataPos = m_xi + yy * m_nx + zz * m_nx * m_ny;
                if ( ( fabs( m_data->at( dataPos )[8] ) > 0.0001 ) )
                {
                    float locZ = zz * m_dz + m_az;
                    float locY = yy * m_dy + m_ay;

                    pos.setY( locY );
                    pos.setZ( locZ );
                    QVector4D test = mvp * pos;

                    if ( fabs( test.x() / 2.0 ) < 1.0 && fabs( test.y() / 2.0 ) < 1.0 )
                    {
                        for ( int k = 0; k < 3; ++k )
                        {
                            if ( k == 0 && m_render1 )
                            {
                                for( int i = 0; i < numVerts; ++i )
                                {
                                    ColumnVector cur( 3 );
                                    cur( 1 ) = (*vertices)( i+1, 1 );
                                    cur( 2 ) = (*vertices)( i+1, 2 );
                                    cur( 3 ) = (*vertices)( i+1, 3 );

                                    ColumnVector m1( 3 );
                                    m1( 1 ) = m_data->at( dataPos )[k*9];
                                    m1( 2 ) = m_data->at( dataPos )[k*9+1];
                                    m1( 3 ) = m_data->at( dataPos )[k*9+2];

                                    ColumnVector m2( 3 );
                                    m2( 1 ) = m_data->at( dataPos )[k*9+3];
                                    m2( 2 ) = m_data->at( dataPos )[k*9+4];
                                    m2( 3 ) = m_data->at( dataPos )[k*9+5];

                                    double val_1( FMath::iprod( m1, cur ) );
                                    double val_2( FMath::iprod( m2, cur ) );


                                    double k1 = m_data->at( dataPos )[k*9+6];
                                    double k2 = m_data->at( dataPos )[k*9+7];
                                    double f0 = m_data->at( dataPos )[k*9+8];

                                    radius =  f0 * exp( -( k1 * val_1 * val_1 + k2 * val_2 * val_2 ) ) ;

                                    m_verts->push_back( (*vertices)( i+1, 1 ) );
                                    m_verts->push_back( (*vertices)( i+1, 2 ) );
                                    m_verts->push_back( (*vertices)( i+1, 3 ) );
                                    m_verts->push_back( m_x );
                                    m_verts->push_back( locY );
                                    m_verts->push_back( locZ );
                                    m_verts->push_back( radius );
                                }
                            }
                            if ( k == 1 && m_render2 )
                            {
                                for( int i = 0; i < numVerts; ++i )
                                {
                                    ColumnVector cur( 3 );
                                    cur( 1 ) = (*vertices)( i+1, 1 );
                                    cur( 2 ) = (*vertices)( i+1, 2 );
                                    cur( 3 ) = (*vertices)( i+1, 3 );

                                    ColumnVector m1( 3 );
                                    m1( 1 ) = m_data->at( dataPos )[k*9];
                                    m1( 2 ) = m_data->at( dataPos )[k*9+1];
                                    m1( 3 ) = m_data->at( dataPos )[k*9+2];

                                    ColumnVector m2( 3 );
                                    m2( 1 ) = m_data->at( dataPos )[k*9+3];
                                    m2( 2 ) = m_data->at( dataPos )[k*9+4];
                                    m2( 3 ) = m_data->at( dataPos )[k*9+5];

                                    double val_1( FMath::iprod( m1, cur ) );
                                    double val_2( FMath::iprod( m2, cur ) );


                                    double k1 = m_data->at( dataPos )[k*9+6];
                                    double k2 = m_data->at( dataPos )[k*9+7];
                                    double f0 = m_data->at( dataPos )[k*9+8];

                                    radius =  f0 * exp( -( k1 * val_1 * val_1 + k2 * val_2 * val_2 ) ) ;

                                    m_verts->push_back( (*vertices)( i+1, 1 ) );
                                    m_verts->push_back( (*vertices)( i+1, 2 ) );
                                    m_verts->push_back( (*vertices)( i+1, 3 ) );
                                    m_verts->push_back( m_x );
                                    m_verts->push_back( locY );
                                    m_verts->push_back( locZ );
                                    m_verts->push_back( radius );
                                }
                            }
                            if ( k == 2 && m_render3 )
                            {
                                for( int i = 0; i < numVerts; ++i )
                                {
                                    ColumnVector cur( 3 );
                                    cur( 1 ) = (*vertices)( i+1, 1 );
                                    cur( 2 ) = (*vertices)( i+1, 2 );
                                    cur( 3 ) = (*vertices)( i+1, 3 );

                                    ColumnVector m1( 3 );
                                    m1( 1 ) = m_data->at( dataPos )[k*9];
                                    m1( 2 ) = m_data->at( dataPos )[k*9+1];
                                    m1( 3 ) = m_data->at( dataPos )[k*9+2];

                                    ColumnVector m2( 3 );
                                    m2( 1 ) = m_data->at( dataPos )[k*9+3];
                                    m2( 2 ) = m_data->at( dataPos )[k*9+4];
                                    m2( 3 ) = m_data->at( dataPos )[k*9+5];

                                    double val_1( FMath::iprod( m1, cur ) );
                                    double val_2( FMath::iprod( m2, cur ) );


                                    double k1 = m_data->at( dataPos )[k*9+6];
                                    double k2 = m_data->at( dataPos )[k*9+7];
                                    double f0 = m_data->at( dataPos )[k*9+8];

                                    radius =  f0 * exp( -( k1 * val_1 * val_1 + k2 * val_2 * val_2 ) ) ;

                                    m_verts->push_back( (*vertices)( i+1, 1 ) );
                                    m_verts->push_back( (*vertices)( i+1, 2 ) );
                                    m_verts->push_back( (*vertices)( i+1, 3 ) );
                                    m_verts->push_back( m_x );
                                    m_verts->push_back( locY );
                                    m_verts->push_back( locZ );
                                    m_verts->push_back( radius );
                                }
                            }
                        }
                    }
                }
            }
        }
    }
    else
    {
        return;
    }
}
Beispiel #2
0
  void SaPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::BuildP(Level &fineLevel, Level &coarseLevel) const {
    FactoryMonitor m(*this, "Prolongator smoothing", coarseLevel);

    typedef typename Teuchos::ScalarTraits<SC>::magnitudeType Magnitude;

    // Get default tentative prolongator factory
    // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory.
    // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere!
    RCP<const FactoryBase> initialPFact = GetFactory("P");
    if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); }

    // Level Get
    RCP<Matrix> A     = Get< RCP<Matrix> >(fineLevel, "A");
    RCP<Matrix> Ptent = coarseLevel.Get< RCP<Matrix> >("P", initialPFact.get());

    if(restrictionMode_) {
      SubFactoryMonitor m2(*this, "Transpose A", coarseLevel);
      A = Utils2::Transpose(A, true); // build transpose of A explicitely
    }

    //Build final prolongator
    RCP<Matrix> finalP; // output

    //FIXME Xpetra::Matrix should calculate/stash max eigenvalue
    //FIXME SC lambdaMax = A->GetDinvALambda();

    const ParameterList & pL = GetParameterList();
    Scalar dampingFactor = pL.get<Scalar>("Damping factor");
    if (dampingFactor != Teuchos::ScalarTraits<Scalar>::zero()) {

      //Teuchos::ParameterList matrixList;
      //RCP<Matrix> I = MueLu::Gallery::CreateCrsMatrix<SC, LO, GO, Map, CrsMatrixWrap>("Identity", Get< RCP<Matrix> >(fineLevel, "A")->getRowMap(), matrixList);
      //RCP<Matrix> newPtent = Utils::TwoMatrixMultiply(I, false, Ptent, false);
      //Ptent = newPtent; //I tried a checkout of the original Ptent, and it seems to be gone now (which is good)

      RCP<Matrix> AP;
      {
        SubFactoryMonitor m2(*this, "MxM: A x Ptentative", coarseLevel);
        //JJH -- If I switch doFillComplete to false, the resulting matrix seems weird when printed with describe.
        //JJH -- The final prolongator is wrong, to boot.  So right now, I fillComplete AP, but avoid fillComplete
        //JJH -- in the scaling.  Long story short, we're doing 2 fillCompletes, where ideally we'd do just one.
        bool doFillComplete=true;

        bool optimizeStorage=true;

        // FIXME: ADD() need B.getProfileType()==DynamicProfile
        if (A->getRowMap()->lib() == Xpetra::UseTpetra) {
          optimizeStorage=false;
        }

        bool allowMLMultiply = true;
#ifdef HAVE_MUELU_EXPERIMENTAL
        // Energy minimization uses AP pattern for restriction. The problem with ML multiply is that it automatically
        // removes zero valued entries in the matrix product, resulting in incorrect pattern for the minimization.
        // One could try to mitigate that by multiply matrices with all entries equal to zero, which would produce the
        // correct graph. However, that is one extra MxM we don't need.
        // Instead, I disable ML multiply when experimental option is specified.
        // NOTE: Thanks to C.Siefert, native EPetra MxM multiply version should actually be comparable with ML in time
        allowMLMultiply      = false;
#endif

        AP = Utils::Multiply(*A, false, *Ptent, false, doFillComplete, optimizeStorage, allowMLMultiply);
      }

      {
        SubFactoryMonitor m2(*this, "Scaling (A x Ptentative) by D^{-1}", coarseLevel);
        bool doFillComplete=true;
        bool optimizeStorage=false;
        Teuchos::ArrayRCP<SC> diag = Utils::GetMatrixDiagonal(*A);
        Utils::MyOldScaleMatrix(AP, diag, true, doFillComplete, optimizeStorage); //scale matrix with reciprocal of diag
      }

      Scalar lambdaMax;
      {
        SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel);
        lambdaMax = A->GetMaxEigenvalueEstimate();
        if (lambdaMax == -Teuchos::ScalarTraits<SC>::one()) {
          GetOStream(Statistics1, 0) << "Calculating max eigenvalue estimate now" << std::endl;
          Magnitude stopTol = 1e-4;
          lambdaMax = Utils::PowerMethod(*A, true, (LO) 10, stopTol);
          A->SetMaxEigenvalueEstimate(lambdaMax);
        } else {
          GetOStream(Statistics1, 0) << "Using cached max eigenvalue estimate" << std::endl;
        }
        GetOStream(Statistics1, 0) << "Damping factor = " << dampingFactor/lambdaMax << " (" << dampingFactor << " / " << lambdaMax << ")" << std::endl;
      }

      {
        SubFactoryMonitor m2(*this, "M+M: P = (Ptentative) + (D^{-1} x A x Ptentative)", coarseLevel);

        bool doTranspose=false;
        bool PtentHasFixedNnzPerRow=true;
        Utils2::TwoMatrixAdd(Ptent, doTranspose, Teuchos::ScalarTraits<Scalar>::one(), AP, doTranspose, -dampingFactor/lambdaMax, finalP, PtentHasFixedNnzPerRow);
      }

      {
        SubFactoryMonitor m2(*this, "FillComplete() of P", coarseLevel);
        finalP->fillComplete( Ptent->getDomainMap(), Ptent->getRangeMap() );
      }

    } else {
      finalP = Ptent;
    }

    // Level Set
    if (!restrictionMode_) {
      // prolongation factory is in prolongation mode
      Set(coarseLevel, "P", finalP);

      // NOTE: EXPERIMENTAL
      if (Ptent->IsView("stridedMaps"))
        finalP->CreateView("stridedMaps", Ptent);

    } else {
      // prolongation factory is in restriction mode
      RCP<Matrix> R = Utils2::Transpose(finalP, true); // use Utils2 -> specialization for double
      Set(coarseLevel, "R", R);

      // NOTE: EXPERIMENTAL
      if (Ptent->IsView("stridedMaps"))
        R->CreateView("stridedMaps", Ptent, true);
    }

    RCP<ParameterList> params = rcp(new ParameterList());
    params->set("printLoadBalancingInfo", true);
    GetOStream(Statistics0,0) << Utils::PrintMatrixInfo(*finalP, (!restrictionMode_ ? "P" : "R"), params);

  } //Build()
/******************************************************************************
One forward-backward pass through a minimum-duration HMM model with a
single Gaussian in each of the states. T: totalFeatures
*******************************************************************************/
double ESHMM::mdHMMLogForwardBackward(ESHMM *mdHMM, VECTOR_OF_F_VECTORS *features, double **post, int T, mat &gamma, rowvec &gamma1,
			     mat &sumxi){
  printf("forward backward algorithm calculation in progress...\n");
  int i = 0, j = 0 , k = 0;
  /* total no of states is much larger, instead of number of pdfs we have to extend 
   states by Min_DUR, therefore total states = Q * MD */
  int Q = mdHMM->hmmStates;
  int Qmd = Q * MIN_DUR;
  mat logalpha(T, Qmd); // forward probability matrix
  mat logbeta(T, Qmd); // backward probability matrix
  mat logg(T, Qmd);  // loggamma 
  mat m(Q, 1);
  mat logA(Qmd, Qmd);  /// transition matrix is already in logarithm
  mat new_logp(T, Qmd); // after replication for each substates
  mat logp_k(Q, T);  // we have single cluster only, probability of each feature corresponding to each cluster
  
  printf("Q: %d  Qmd: %d\n", Q, Qmd);
  for(i = 0; i < Qmd; i++){
    for(j = 0; j < Qmd; j++){
      logA(i, j) = mdHMM->trans[i]->array[j];
    }
  }
  
  // minimum duration viterbi hence modify B(posterior) prob matrix
  
  for(i = 0; i < Q; i++)
    for(j = 0; j < T; j++){
      logp_k(i, j) = post[i][j];
    }
  
  for(i = 0; i < Q; i++){
    m(i, 0) = 1;
    for(j = 0; j < T; j++)
      logp_k(i, j) = 0.0; // since  we have only one cluster so cluster probability and 
    // total probability is same. Hence subtracting cluster probability from total probability would make it zero.
  }
  
  // modifying logp matrix according to minimum duration
  for(i = 0; i < Q; i++){
    for(j = 0; j < T; j++){
      for(k = i*MIN_DUR; k < (i+1)*MIN_DUR; k++){
	new_logp(j, k) = post[i][j];
      }
    }
  }
  /* forward initialization */
  // for summing log probabilties, first sum probs and then take logarithm
  printf("forward initialization...\n\n");
  for(i = 0; i < Qmd; i++){
    logalpha(0, i) = mdHMM->prior->array[i] + new_logp(0, i) ;
  }
  ///print logalpha after initialization
  for(i = 0; i < Qmd; i++)
    printf("%lf ", logalpha(0, i));
  
  /* forward induction */
  printf("forward induction in progress...\n");
  int t = 0;  
  
  mpfr_t summation3;
  mpfr_init(summation3);
  mpfr_t var11, var21;
  mpfr_init(var11);
  mpfr_init(var21);
  mpfr_set_d(var11, 0.0, MPFR_RNDN);
  mpfr_set_d(var21, 0.0, MPFR_RNDN);
  mpfr_set_d(summation3, 0.0, MPFR_RNDN);
  
  for(t = 1; t < T; t++){
    //printf("%d ", t);
    for(j = 0; j < Qmd; j++){
      vec v1(Qmd), v2(Qmd);      vec v3(Qmd);  
      //first find logalpha vector
      for(i = 0; i < Qmd; i++)
	v1(i) = logalpha(t-1, i);
      // if(t < 20)
      // 	v1.print("v1:\n");
      
      // extract transition probability vector
      for(i = 0; i < Qmd; i++)
	v2(i) = logA(i, j);
      // if(t < 20)
      // 	v2.print("v2:\n");
      // Now sum both the vectors into one
      for(i = 0; i < Qmd; i++)
	v3(i) = v1(i) + v2(i);
            
      double *temp = (double *)calloc(Qmd, sizeof(double ));
      for(i = 0; i < Qmd; i++)
	temp[i] = v3(i);
      // if(t < 20)
      // 	v3.print("v3:\n");
      //printf("printed\n");
      // now sum over whole column vector      
      mpfr_set_d(summation3, 0.0, MPFR_RNDN);
      // take the exponentiation and summation in one loop 
      
      // getting double from mpfr variable
      /// double mpfr_get_d(mpfr_t op, mpfr_rnd_t rnd);      
      //mpfr_set_d(var1, 0.0, MPFR_RNDD);
      //mpfr_set_d(var2, 0.0, MPFR_RNDD);
      // now take the exponentiation
      for(i = 0; i < Qmd; i++){
	double elem = temp[i];
	mpfr_set_d(var21, elem, MPFR_RNDD);
	//mpfr_printf("var2: %lf\n", var21);
	mpfr_exp(var11, var21, MPFR_RNDD); ///take exp(v2) and store in v1 
	// take sum of all elements in total  
	mpfr_add(summation3, summation3, var11, MPFR_RNDD); // add summation and v1
      }
      
      // now take the logarithm of sum 
      mpfr_log(summation3, summation3, MPFR_RNDD);
      // now convert this sum to double
      double sum2 = mpfr_get_d(summation3, MPFR_RNDD);
      // now assign this double to logalpha
      
      // now add logp(t, j)      
      sum2 += new_logp(t, j);
      // if(t < 20)
      // 	printf("sum: %lf\n", sum2);
      logalpha(t, j) = sum2;
      /// clear mpfr variables      
    }
    if(t < 20){
      printf("logalpha:\n");
      for(j = 0; j < Qmd; j++)
	printf("%lf ", logalpha(t, j));
      printf("\n");
    }
  }  // close the forward induction loop   
  mpfr_clear(var11);
  mpfr_clear(var21);
  mpfr_clear(summation3);
  
  /* forward termination */
  double ll = 0; // total log likelihood of all observation given this HMM
  for(i = 0; i < Qmd; i++){
    ll += logalpha(T-1, i);
  }
  ///===================================================================
  // for(i = 0; i < 100; i++){
  //   for(j = 0; j < Qmd; j++)
  //     printf("%lf ", logalpha(i, j));
  //   printf("\n");
  // }
  printf("\nprinting last column of logalpha...\n");
  for(i = 1; i < 6; i++){
    for(j = 0; j < Qmd; j++)
      printf("%lf ", logalpha(T-i, j));
    printf("\n");
  }
  printf("total loglikelihood: %lf\n", ll);
  ///===================================================================  
  double sum = 0;
  /* calculate logalpha last row sum */  
  for(i = 0; i < Qmd; i++)
    sum += logalpha(T-1, i);
  ll = sum;
  printf("LL: %lf........\n", ll);
  
  /* backward initilization */
  /// intialize mpfr variables  
  mpfr_t summation;
  mpfr_init(summation);
  mpfr_t var1, var2;
  mpfr_init(var1);
  mpfr_init(var2);
  mpfr_set_d(summation, 0.0, MPFR_RNDN);
  mpfr_set_d(var1, 0.0, MPFR_RNDN);
  mpfr_set_d(var2, 0.0, MPFR_RNDN);
  
  printf("backward initialization...\n");
  mpfr_set_d(summation, 0.0, MPFR_RNDN);
  double *temp = (double *)calloc(Qmd, sizeof(double ));
  
  for(i = 0; i < Qmd; i++)
    temp[i] = logalpha(T-1, i);
  
  for(i = 0; i < Qmd; i++){
    //double elem = logalpha(T-1, i);
    double elem = temp[i-1];
    mpfr_set_d(var2, elem, MPFR_RNDN);
    mpfr_exp(var1, var2, MPFR_RNDN);
    mpfr_add(summation, summation, var1, MPFR_RNDN);
  }
  // take logarithm
  mpfr_log(summation, summation, MPFR_RNDN);
  double sum2 = mpfr_get_d(summation, MPFR_RNDN);
  for(i = 0; i < Qmd; i++){    
    logg(T-1, i) = logalpha(T-1, i) - sum2 ;
  }

  // gamma matrix
  for(j = 0; j < Q; j++){
    gamma(j, T-1) = exp(logp_k(j, T-1) + logg(T-1, j));
  }
  mat lognewxi(Qmd, Qmd); // declare lognewxi matrix 
  /* backward induction */
  printf("backward induction in progress...\n");
  for(t = T-2; t >= 0 ; t--){
    for(j = 0; j < Qmd; j++){
      vec v1(Qmd);
      vec v2(Qmd);
      vec v3(Qmd);
      sum = 0;
      for(i = 0; i < Qmd; i++)
	v1(i) = logA(j, i);
      for(i = 0; i < Qmd; i++)
	v2(i) = logbeta(t+1, i);
      for(i = 0; i < Qmd; i++)
	v3(i) = new_logp(t+1, i);
      // add all three vectors
      for(i = 0; i < Qmd; i++)
	v1(i) += v2(i) + v3(i);
      mpfr_set_d(summation, 0.0, MPFR_RNDN);
      for(i = 0; i < Qmd; i++){
	double elem = v1(i);
	mpfr_set_d(var2, elem, MPFR_RNDN);
	mpfr_exp(var1, var2, MPFR_RNDN);
	mpfr_add(summation, summation, var1, MPFR_RNDN);
      }      
      mpfr_log(summation, summation, MPFR_RNDN);
      sum2 = mpfr_get_d(summation, MPFR_RNDN);
      logbeta(t, j) = sum2;
    }

    // computation of log(gamma) is now possible called logg here
    for(i = 0; i < Qmd; i++){
      logg(t, i) = logalpha(t, i) + logbeta(t, i);
    }
    mpfr_set_d(summation, 0.0, MPFR_RNDN);
    for(i = 0; i < Qmd; i++){
      double elem = logg(t, i);
      mpfr_set_d(var2, elem, MPFR_RNDN);
      mpfr_exp(var1, var2, MPFR_RNDN);
      mpfr_add(summation, summation, var1, MPFR_RNDN);
    }
    mpfr_log(summation, summation, MPFR_RNDN);
    sum2 = mpfr_get_d(summation, MPFR_RNDN);
    for(i = 0; i < Qmd; i++)
      logg(t, i) = logg(t, i) - sum2;
    
    // finally the gamma_k is computed (called gamma here )
    mpfr_set_d(summation, 0.0, MPFR_RNDN);
    for(j = 0; j < Q; j++){
      // for(i = j*MIN_DUR; i < (j+1) * MIN_DUR; i++){
      // 	sum += exp(logg(t, i));
      // }
      gamma(j, t) = exp( logp_k(j, t) + logg(t, j) );
    }    
    /* for the EM algorithm we need the sum
       over xi all over t */
    // replicate logalpha(t, :)' matrix along columns
    mat m1(Qmd, Qmd);
    for(i = 0; i < Qmd; i++){
      for(j = 0; j < Qmd; j++){
	m1(i, j) = logalpha(t, i);
      }
    }
    // replicate logbeta matrix
    vec v1(Qmd);
    for(i = 0; i < Qmd; i++)
      v1(i) = logbeta(t+1, i);
    vec v2(Qmd);
    for(i = 0; i < Qmd; i++)
      v2(i) = new_logp(t+1, i);
    vec v3(Qmd);
    for(i = 0; i < Qmd; i++)
      v3(i) = v1(i) + v2(i);
    // replicate v3 row vector along all rows of matrix m2
    mat m2(Qmd, Qmd);
    for(i = 0; i < Qmd; i++){
      for(j = 0; j < Qmd; j++){
	m2(i, j) = v3(i);
      }
    }
    
    // add both matrices m1 and m2 
    mat m3(Qmd, Qmd);
    m3 = m1 + m2;  // can do direct addition   
    ///mat lognewxi(Qmd, Qmd); // declare lognewxi matrix 
    lognewxi.zeros();
    lognewxi = m3 + logA;
    // add new sum to older sumxi
    /// first subtract total sum from lognewxi
    mpfr_set_d(summation, 0.0, MPFR_RNDN);
    for(i = 0; i < Qmd; i++){
      for(j = 0; j < Qmd; j++){
	double elem = lognewxi(i, j);
	mpfr_set_d(var2, elem, MPFR_RNDN);
	mpfr_exp(var1, var2, MPFR_RNDN);
	mpfr_add(summation, summation, var1, MPFR_RNDN);	
	//sum += exp(lognewxi(i, j));
      }
    }
    // now take the logarithm of sum
    mpfr_log(summation, summation, MPFR_RNDN);
    sum2 = mpfr_get_d(summation, MPFR_RNDN);
    // subtract sum from lognewxi
    for(i = 0; i < Qmd; i++){
      for(j = 0; j < Qmd; j++){
	lognewxi(i, j) = lognewxi(i, j) - sum2;
      }
    }
    mat newxi(Qmd, Qmd);
    newxi = lognewxi;
    // add sumxi and newlogsumxi
    /// take exponential of each element
    for(i = 0; i < Qmd; i++){
      for(j = 0; j < Qmd; j++){
	newxi(i, j) = exp(newxi(i, j));
      }
    }
    sumxi = sumxi + newxi;
  } // close the backward induction loop 
  
  /* handle annoying numerics */
  /// calculate sum of lognewxi along each row (lognewxi is already modified in our case)
  
  for(i = 0; i < Qmd; i++){
    mpfr_set_d(summation, 0.0, MPFR_RNDN);
    for(j = 0; j < Qmd; j++){
      //sum += lognewxi(i, j);
      double elem = lognewxi(i, j);
      mpfr_set_d(var2, elem, MPFR_RNDN);
      mpfr_exp(var1, var2, MPFR_RNDN);
      mpfr_add(summation, summation, var1, MPFR_RNDN);
    }
    sum2 = mpfr_get_d(summation, MPFR_RNDN);
    gamma1(i) = sum2;
  }
  // normalize gamma1 which is prior and normalize sumxi which is transition matrix
  sum = 0;
  for(i = 0; i < Qmd; i++)
    sum += gamma1(i);
  for(i = 0; i < Qmd; i++)
    gamma1(i) /= sum;  
  // transition probability matrix will be normalized in train_hmm function
  
  /// clear mpfr variables
  mpfr_clear(summation);
  mpfr_clear(var1);
  mpfr_clear(var2);
  printf("forward-backward algorithm calculation is done...\n");
  /* finished forward-backward algorithm */    
  return ll;
}
Beispiel #4
0
bool ADTFile::init(uint32 map_num, uint32 tileX, uint32 tileY)
{
    Log sLog;

    if(ADT.isEof ())
        return false;

    uint32 size;

    string xMap;
    string yMap;

    Adtfilename.erase(Adtfilename.find(".adt"),4);
    string TempMapNumber;
    TempMapNumber = Adtfilename.substr(Adtfilename.length()-6,6);
    xMap = TempMapNumber.substr(TempMapNumber.find("_")+1,(TempMapNumber.find_last_of("_")-1) - (TempMapNumber.find("_")));
    yMap = TempMapNumber.substr(TempMapNumber.find_last_of("_")+1,(TempMapNumber.length()) - (TempMapNumber.find_last_of("_")));
    Adtfilename.erase((Adtfilename.length()-xMap.length()-yMap.length()-2), (xMap.length()+yMap.length()+2));
    string AdtMapNumber = xMap + ' ' + yMap + ' ' + GetPlainName((char*)Adtfilename.c_str());
    //sLog.Write("Processing map %s...", AdtMapNumber.c_str());
    //sLog.Write("MapNumber = %s", TempMapNumber.c_str());
    //sLog.Write("xMap = %s", xMap.c_str());
    //sLog.Write("yMap = %s", yMap.c_str());

    std::string dirname = std::string(szWorkDirWmo) + "/dir_bin";
    FILE *dirfile;
    dirfile = fopen(dirname.c_str(), "ab");
    if(!dirfile)
    {
        sLog.Write("Can't open dirfile!'%s'", dirname.c_str());
        return false;
    }

    while (!ADT.isEof())
    {
        char fourcc[5];
        ADT.read(&fourcc,4);
        ADT.read(&size, 4);
        flipcc(fourcc);
        fourcc[4] = 0;

        size_t nextpos = ADT.getPos() + size;

        if (!strcmp(fourcc,"MCIN"))
        {
        }
        else if (!strcmp(fourcc,"MTEX"))
        {
        }
        else if (!strcmp(fourcc,"MMDX"))
        {
            if (size)
            {
                char *buf = new char[size];
                ADT.read(buf, size);
                char *p=buf;
                int t=0;
                ModelInstansName = new string[size];
                while (p<buf+size)
                {
                    fixnamen(p,strlen(p));
                    string path(p);
                    char* s=GetPlainName(p);
                    fixname2(s,strlen(s));
                    p=p+strlen(p)+1;
                    ModelInstansName[t++] = s;

                    // < 3.1.0 ADT MMDX section store filename.mdx filenames for corresponded .m2 file
                    std::string ext3 = path.size() >= 4 ? path.substr(path.size()-4,4) : "";
                    std::transform( ext3.begin(), ext3.end(), ext3.begin(), ::tolower );
                    if(ext3 == ".mdx")
                    {
                        // replace .mdx -> .m2
                        path.erase(path.length()-2,2);
                        path.append("2");
                    }
                    // >= 3.1.0 ADT MMDX section store filename.m2 filenames for corresponded .m2 file
                    // nothing do

                    char szLocalFile[1024];
                    snprintf(szLocalFile, 1024, "%s/%s", szWorkDirWmo, s);
                    FILE * output = fopen(szLocalFile,"rb");
                    if(!output)
                    {
                        Model m2(path);
                        if(m2.open())
                            m2.ConvertToVMAPModel(szLocalFile);
                    }
                    else
                        fclose(output);
                }
                delete[] buf;
            }
        }
        else if (!strcmp(fourcc,"MWMO"))
        {
            if (size)
            {
                char *buf = new char[size];
                ADT.read(buf, size);
                char *p=buf;
                int q = 0;
                WmoInstansName = new string[size];
                while (p<buf+size)
                {
                    string path(p);
                    char* s=GetPlainName(p);
                    fixnamen(s,strlen(s));
                    fixname2(s,strlen(s));
                    p=p+strlen(p)+1;
                    WmoInstansName[q++] = s;
                }
                delete[] buf;
            }
        }
        //======================
        else if (!strcmp(fourcc,"MDDF"))
        {
            if (size)
            {
                nMDX = (int)size / 36;
                for (int i=0; i<nMDX; ++i)
                {
                    uint32 id;
                    ADT.read(&id, 4);
                    ModelInstance inst(ADT,ModelInstansName[id].c_str(), map_num, tileX, tileY, dirfile);
                }
                delete[] ModelInstansName;
            }
        }
        else if (!strcmp(fourcc,"MODF"))
        {
            if (size)
            {
                nWMO = (int)size / 64;
                for (int i=0; i<nWMO; ++i)
                {
                    uint32 id;
                    ADT.read(&id, 4);
                    WMOInstance inst(ADT,WmoInstansName[id].c_str(), map_num, tileX, tileY, dirfile);
                }
                delete[] WmoInstansName;
            }
        }
        //======================
        ADT.seek(nextpos);
    }
    ADT.close();
    fclose(dirfile);
    return true;
}
Beispiel #5
0
void KinZfitter::MakeModel(/*RooWorkspace &w,*/ KinZfitter::FitInput &input, KinZfitter::FitOutput &output) {

     //lep
     RooRealVar pTRECO1_lep("pTRECO1_lep", "pTRECO1_lep", input.pTRECO1_lep, 5, 500);
     RooRealVar pTRECO2_lep("pTRECO2_lep", "pTRECO2_lep", input.pTRECO2_lep, 5, 500);
     RooRealVar pTMean1_lep("pTMean1_lep", "pTMean1_lep", 
                            input.pTRECO1_lep, max(5.0, input.pTRECO1_lep-2*input.pTErr1_lep), input.pTRECO1_lep+2*input.pTErr1_lep);
     RooRealVar pTMean2_lep("pTMean2_lep", "pTMean2_lep", 
                            input.pTRECO2_lep, max(5.0, input.pTRECO2_lep-2*input.pTErr2_lep), input.pTRECO2_lep+2*input.pTErr2_lep);
     RooRealVar pTSigma1_lep("pTSigma1_lep", "pTSigma1_lep", input.pTErr1_lep);
     RooRealVar pTSigma2_lep("pTSigma2_lep", "pTSigma2_lep", input.pTErr2_lep);
     RooRealVar theta1_lep("theta1_lep", "theta1_lep", input.theta1_lep);
     RooRealVar theta2_lep("theta2_lep", "theta2_lep", input.theta2_lep);
     RooRealVar phi1_lep("phi1_lep", "phi1_lep", input.phi1_lep);
     RooRealVar phi2_lep("phi2_lep", "phi2_lep", input.phi2_lep);
     RooRealVar m1("m1", "m1", input.m1);
     RooRealVar m2("m2", "m2", input.m2);

     //gamma
     RooRealVar pTRECO1_gamma("pTRECO1_gamma", "pTRECO1_gamma", input.pTRECO1_gamma, 5, 500);
     RooRealVar pTRECO2_gamma("pTRECO2_gamma", "pTRECO2_gamma", input.pTRECO2_gamma, 5, 500);
     RooRealVar pTMean1_gamma("pTMean1_gamma", "pTMean1_gamma", 
                              input.pTRECO1_gamma, max(0.5, input.pTRECO1_gamma-2*input.pTErr1_gamma), input.pTRECO1_gamma+2*input.pTErr1_gamma);
     RooRealVar pTMean2_gamma("pTMean2_gamma", "pTMean2_gamma", 
                              input.pTRECO2_gamma, max(0.5, input.pTRECO2_gamma-2*input.pTErr2_gamma), input.pTRECO2_gamma+2*input.pTErr2_gamma);
     RooRealVar pTSigma1_gamma("pTSigma1_gamma", "pTSigma1_gamma", input.pTErr1_gamma);
     RooRealVar pTSigma2_gamma("pTSigma2_gamma", "pTSigma2_gamma", input.pTErr2_gamma);
     RooRealVar theta1_gamma("theta1_gamma", "theta1_gamma", input.theta1_gamma);
     RooRealVar theta2_gamma("theta2_gamma", "theta2_gamma", input.theta2_gamma);
     RooRealVar phi1_gamma("phi1_gamma", "phi1_gamma", input.phi1_gamma);
     RooRealVar phi2_gamma("phi2_gamma", "phi2_gamma", input.phi2_gamma);

     //gauss
     RooGaussian gauss1_lep("gauss1_lep", "gauss1_lep", pTRECO1_lep, pTMean1_lep, pTSigma1_lep);
     RooGaussian gauss2_lep("gauss2_lep", "gauss2_lep", pTRECO2_lep, pTMean2_lep, pTSigma2_lep);
     RooGaussian gauss1_gamma("gauss1_gamma", "gauss1_gamma", pTRECO1_gamma, pTMean1_gamma, pTSigma1_gamma);
     RooGaussian gauss2_gamma("gauss2_gamma", "gauss2_gamma", pTRECO2_gamma, pTMean2_gamma, pTSigma2_gamma);


     TString makeE_lep = "TMath::Sqrt((@0*@0)/((TMath::Sin(@1))*(TMath::Sin(@1)))[email protected]*@2)";
     RooFormulaVar E1_lep("E1_lep", makeE_lep, RooArgList(pTMean1_lep, theta1_lep, m1));  //w.import(E1_lep);
     RooFormulaVar E2_lep("E2_lep", makeE_lep, RooArgList(pTMean2_lep, theta2_lep, m2));  //w.import(E2_lep);

     TString makeE_gamma = "TMath::Sqrt((@0*@0)/((TMath::Sin(@1))*(TMath::Sin(@1))))";
     RooFormulaVar E1_gamma("E1_gamma", makeE_gamma, RooArgList(pTMean1_gamma, theta1_gamma));  //w.import(E1_gamma);
     RooFormulaVar E2_gamma("E2_gamma", makeE_gamma, RooArgList(pTMean2_gamma, theta2_gamma));  //w.import(E2_gamma);

     //dotProduct 3d
     TString dotProduct_3d = "@0*@1*( ((TMath::Cos(@2))*(TMath::Cos(@3)))/((TMath::Sin(@2))*(TMath::Sin(@3)))+(TMath::Cos(@[email protected])))";
     RooFormulaVar p1v3D2("p1v3D2", dotProduct_3d, RooArgList(pTMean1_lep, pTMean2_lep, theta1_lep, theta2_lep, phi1_lep, phi2_lep));
     RooFormulaVar p1v3Dph1("p1v3Dph1", dotProduct_3d, RooArgList(pTMean1_lep, pTMean1_gamma, theta1_lep, theta1_gamma, phi1_lep, phi1_gamma));
     RooFormulaVar p2v3Dph1("p2v3Dph1", dotProduct_3d, RooArgList(pTMean2_lep, pTMean1_gamma, theta2_lep, theta1_gamma, phi2_lep, phi1_gamma));
     RooFormulaVar p1v3Dph2("p1v3Dph2", dotProduct_3d, RooArgList(pTMean1_lep, pTMean2_gamma, theta1_lep, theta2_gamma, phi1_lep, phi2_gamma));
     RooFormulaVar p2v3Dph2("p2v3Dph2", dotProduct_3d, RooArgList(pTMean2_lep, pTMean2_gamma, theta2_lep, theta2_gamma, phi2_lep, phi2_gamma));
     RooFormulaVar ph1v3Dph2("ph1v3Dph2", dotProduct_3d, RooArgList(pTMean1_gamma, pTMean2_gamma, theta1_gamma, theta2_gamma, phi1_gamma, phi2_gamma));

     TString dotProduct_4d = "@0*@[email protected]";
     RooFormulaVar p1D2("p1D2", dotProduct_4d, RooArgList(E1_lep, E2_lep, p1v3D2));  //w.import(p1D2);
     RooFormulaVar p1Dph1("p1Dph1", dotProduct_4d, RooArgList(E1_lep, E1_gamma, p1v3Dph1));//  w.import(p1Dph1);
     RooFormulaVar p2Dph1("p2Dph1", dotProduct_4d, RooArgList(E2_lep, E1_gamma, p2v3Dph1)); // w.import(p2Dph1);
     RooFormulaVar p1Dph2("p1Dph2", dotProduct_4d, RooArgList(E1_lep, E2_gamma, p1v3Dph2));  //w.import(p1Dph2);
     RooFormulaVar p2Dph2("p2Dph2", dotProduct_4d, RooArgList(E2_lep, E2_gamma, p2v3Dph2));  //w.import(p2Dph2);
     RooFormulaVar ph1Dph2("ph1Dph2", dotProduct_4d, RooArgList(E1_gamma, E2_gamma, ph1v3Dph2)); // w.import(ph1Dph2);

     RooRealVar bwMean("bwMean", "m_{Z^{0}}", 91.187); //w.import(bwMean);
     RooRealVar bwGamma("bwGamma", "#Gamma", 2.5); 


     RooProdPdf* PDFRelBW;  
     RooFormulaVar* mZ;
     RooGenericPdf* RelBW;

     //mZ
     mZ = new RooFormulaVar("mZ", "TMath::Sqrt(2*@[email protected]*@[email protected]*@2)", RooArgList(p1D2, m1, m2));
     RelBW = new RooGenericPdf("RelBW","1/( pow(mZ*mZ-bwMean*bwMean,2)+pow(mZ,4)*pow(bwGamma/bwMean,2) )", RooArgSet(*mZ,bwMean,bwGamma) );
     PDFRelBW = new RooProdPdf("PDFRelBW", "PDFRelBW", RooArgList(gauss1_lep, gauss2_lep, *RelBW));     

     if (input.nFsr == 1) {

        mZ = new RooFormulaVar("mZ", "TMath::Sqrt(2*@0+2*@1+2*@[email protected]*@[email protected]*@4)", RooArgList(p1D2, p1Dph1, p2Dph1, m1, m2));
        RelBW = new RooGenericPdf("RelBW","1/( pow(mZ*mZ-bwMean*bwMean,2)+pow(mZ,4)*pow(bwGamma/bwMean,2) )", RooArgSet(*mZ,bwMean,bwGamma) );
//        PDFRelBW = new RooProdPdf("PDFRelBW", "PDFRelBW", RooArgList(gauss1_lep, gauss2_lep, gauss1_gamma, *RelBW));

        } 

     if (input.nFsr == 2) {

        mZ = new RooFormulaVar("mZ", "TMath::Sqrt(2*@0+2*@1+2*@2+2*@3+2*@4+2*@[email protected]*@[email protected]*@7)", RooArgList(p1D2,p1Dph1,p2Dph1,p1Dph2,p2Dph2,ph1Dph2, m1, m2));
        RelBW = new RooGenericPdf("RelBW","1/( pow(mZ*mZ-bwMean*bwMean,2)+pow(mZ,4)*pow(bwGamma/bwMean,2) )", RooArgSet(*mZ,bwMean,bwGamma) );
//        PDFRelBW = new RooProdPdf("PDFRelBW", "PDFRelBW", RooArgList(gauss1_lep, gauss2_lep, gauss1_gamma, gauss2_gamma, *RelBW));

        }

     //true shape
     RooRealVar sg("sg", "sg", sgVal_);
     RooRealVar a("a", "a", aVal_);
     RooRealVar n("n", "n", nVal_);

     RooCBShape CB("CB","CB",*mZ,bwMean,sg,a,n);
     RooRealVar f("f","f", fVal_);

     RooRealVar mean("mean","mean",meanVal_);
     RooRealVar sigma("sigma","sigma",sigmaVal_);
     RooRealVar f1("f1","f1",f1Val_);

     RooAddPdf *RelBWxCB;
     RelBWxCB = new RooAddPdf("RelBWxCB","RelBWxCB", *RelBW, CB, f);
     RooGaussian *gauss;
     gauss = new RooGaussian("gauss","gauss",*mZ,mean,sigma);
     RooAddPdf *RelBWxCBxgauss;
     RelBWxCBxgauss = new RooAddPdf("RelBWxCBxgauss","RelBWxCBxgauss", *RelBWxCB, *gauss, f1);

     RooProdPdf *PDFRelBWxCBxgauss;
     PDFRelBWxCBxgauss = new RooProdPdf("PDFRelBWxCBxgauss","PDFRelBWxCBxgauss", 
                                     RooArgList(gauss1_lep, gauss2_lep, *RelBWxCBxgauss) );


    //make fit
    RooArgSet *rastmp;
    rastmp = new RooArgSet(pTRECO1_lep, pTRECO2_lep);
/*
    if(input.nFsr == 1) {
      rastmp = new RooArgSet(pTRECO1_lep, pTRECO2_lep, pTRECO1_gamma);
      }

    if(input.nFsr == 2) {
      rastmp = new RooArgSet(pTRECO1_lep, pTRECO2_lep, pTRECO1_gamma, pTRECO2_gamma);
      }
*/
    RooDataSet* pTs = new RooDataSet("pTs","pTs", *rastmp);
    pTs->add(*rastmp);

    RooFitResult* r;
    if (mass4lRECO_ > 140) {

       r = PDFRelBW->fitTo(*pTs,RooFit::Save(),RooFit::PrintLevel(-1));

       } else {

              r = PDFRelBWxCBxgauss->fitTo(*pTs,RooFit::Save(),RooFit::PrintLevel(-1));

              }
    //save fit result
    const TMatrixDSym& covMatrix = r->covarianceMatrix();
    const RooArgList& finalPars = r->floatParsFinal();

    for (int i=0 ; i<finalPars.getSize(); i++){
 
        TString name = TString(((RooRealVar*)finalPars.at(i))->GetName());
        if(debug_) cout<<"name list of RooRealVar for covariance matrix "<<name<<endl;

    }

    int size = covMatrix.GetNcols();
    output.covMatrixZ.ResizeTo(size,size);
    output.covMatrixZ = covMatrix;
    
    output.pT1_lep = pTMean1_lep.getVal();
    output.pT2_lep = pTMean2_lep.getVal();
    output.pTErr1_lep = pTMean1_lep.getError();
    output.pTErr2_lep = pTMean2_lep.getError();
/*
    if (input.nFsr >= 1) {

       output.pT1_gamma = pTMean1_gamma.getVal();
       output.pTErr1_gamma = pTMean1_gamma.getError();
    
       }

    if (input.nFsr == 2) {

       output.pT2_gamma = pTMean2_gamma.getVal();
       output.pTErr2_gamma = pTMean2_gamma.getError();

       }
*/
    delete rastmp;
    delete pTs;
    delete PDFRelBW;
    delete mZ;
    delete RelBW;
    delete RelBWxCB;
    delete gauss;
    delete RelBWxCBxgauss;
    delete PDFRelBWxCBxgauss;
}
int toeplitz_test(ScalarType epsilon)
{
    std::size_t TOEPLITZ_SIZE = 47;
    viennacl::toeplitz_matrix<ScalarType> vcl_toeplitz1(TOEPLITZ_SIZE, TOEPLITZ_SIZE);
    viennacl::toeplitz_matrix<ScalarType> vcl_toeplitz2(TOEPLITZ_SIZE, TOEPLITZ_SIZE);

    viennacl::vector<ScalarType> vcl_input(TOEPLITZ_SIZE);
    viennacl::vector<ScalarType> vcl_result(TOEPLITZ_SIZE);

    std::vector<ScalarType> input_ref(TOEPLITZ_SIZE);
    std::vector<ScalarType> result_ref(TOEPLITZ_SIZE);

    dense_matrix<ScalarType> m1(TOEPLITZ_SIZE, TOEPLITZ_SIZE);
    dense_matrix<ScalarType> m2(TOEPLITZ_SIZE, TOEPLITZ_SIZE);

    for (std::size_t i = 0; i < TOEPLITZ_SIZE; i++)
      for (std::size_t j = 0; j < TOEPLITZ_SIZE; j++)
      {
        m1(i,j) = static_cast<ScalarType>(i) - static_cast<ScalarType>(j);
        m2(i,j) = m1(i,j) * m1(i,j) + ScalarType(1);
      }

    for (std::size_t i = 0; i < TOEPLITZ_SIZE; i++)
      input_ref[i] = ScalarType(i);

    // Copy to ViennaCL
    viennacl::copy(m1, vcl_toeplitz1);
    viennacl::copy(m2, vcl_toeplitz2);
    viennacl::copy(input_ref, vcl_input);

    //
    // Matrix-Vector product:
    //
    vcl_result = viennacl::linalg::prod(vcl_toeplitz1, vcl_input);

    for (std::size_t i = 0; i < m1.size1(); i++)     //reference calculation
    {
      ScalarType entry = 0;
      for (std::size_t j = 0; j < m1.size2(); j++)
        entry += m1(i,j) * input_ref[j];

      result_ref[i] = entry;
    }

    viennacl::copy(vcl_result, input_ref);
    std::cout << "Matrix-Vector Product: " << diff_max(input_ref, result_ref);
    if (diff_max(input_ref, result_ref) < epsilon)
      std::cout << " [OK]" << std::endl;
    else
    {
      for (std::size_t i=0; i<input_ref.size(); ++i)
        std::cout << "Should: " << result_ref[i] << ", is: " << input_ref[i] << std::endl;
      std::cout << " [FAILED]" << std::endl;
      return EXIT_FAILURE;
    }


    //
    // Matrix addition:
    //
    vcl_toeplitz1 += vcl_toeplitz2;

    for (std::size_t i = 0; i < m1.size1(); i++)    //reference calculation
      for (std::size_t j = 0; j < m1.size2(); j++)
        m1(i,j) += m2(i,j);

    viennacl::copy(vcl_toeplitz1, m2);
    std::cout << "Matrix Addition: " << diff(m1, m2);
    if (diff(m1, m2) < epsilon)
      std::cout << " [OK]" << std::endl;
    else
    {
      std::cout << " [FAILED]" << std::endl;
      return EXIT_FAILURE;
    }

    //
    // Per-Element access:
    //
    vcl_toeplitz1(2,4) = 42;

    for (std::size_t i=0; i<m1.size1(); ++i)    //reference calculation
    {
      if (i + 2 < m1.size2())
        m1(i, i+2) = 42;
    }

    viennacl::copy(vcl_toeplitz1, m2);
    std::cout << "Element manipulation: " << diff(m1, m2);
    if (diff(m1, m2) < epsilon)
      std::cout << " [OK]" << std::endl;
    else
    {
      std::cout << " [FAILED]" << std::endl;
      return EXIT_FAILURE;
    }

    return EXIT_SUCCESS;
}
int hankel_test(ScalarType epsilon)
{
    std::size_t HANKEL_SIZE = 7;
    viennacl::hankel_matrix<ScalarType> vcl_hankel1(HANKEL_SIZE, HANKEL_SIZE);
    viennacl::hankel_matrix<ScalarType> vcl_hankel2(HANKEL_SIZE, HANKEL_SIZE);

    viennacl::vector<ScalarType> vcl_input(HANKEL_SIZE);
    viennacl::vector<ScalarType> vcl_result(HANKEL_SIZE);

    std::vector<ScalarType> input_ref(HANKEL_SIZE);
    std::vector<ScalarType> result_ref(HANKEL_SIZE);

    dense_matrix<ScalarType> m1(vcl_hankel1.size1(), vcl_hankel1.size2());
    dense_matrix<ScalarType> m2(m1.size1(), m1.size2());

    for (std::size_t i = 0; i < m1.size1(); i++)
      for (std::size_t j = 0; j < m1.size2(); j++)
      {
        m1(i,j) = static_cast<ScalarType>((i + j) % (2 * m1.size1()));
        m2(i,j) = m1(i,j) * m1(i,j) + ScalarType(1);
      }

    for (std::size_t i = 0; i < input_ref.size(); i++)
      input_ref[i] = ScalarType(i);

    // Copy to ViennaCL
    viennacl::copy(m1, vcl_hankel1);
    viennacl::copy(m2, vcl_hankel2);
    viennacl::copy(input_ref, vcl_input);

    //
    // Matrix-Vector product:
    //
    vcl_result = viennacl::linalg::prod(vcl_hankel1, vcl_input);

    for (std::size_t i = 0; i < m1.size1(); i++)     //reference calculation
    {
      ScalarType entry = 0;
      for (std::size_t j = 0; j < m1.size2(); j++)
        entry += m1(i,j) * input_ref[j];

      result_ref[i] = entry;
    }

    viennacl::copy(vcl_result, input_ref);
    std::cout << "Matrix-Vector Product: " << diff_max(input_ref, result_ref);
    if (diff_max(input_ref, result_ref) < epsilon)
      std::cout << " [OK]" << std::endl;
    else
    {
      for (std::size_t i=0; i<input_ref.size(); ++i)
        std::cout << "Should: " << result_ref[i] << ", is: " << input_ref[i] << std::endl;
      std::cout << " [FAILED]" << std::endl;
      return EXIT_FAILURE;
    }


    //
    // Matrix addition:
    //
    vcl_hankel1 += vcl_hankel2;

    for (std::size_t i = 0; i < m1.size1(); i++)    //reference calculation
      for (std::size_t j = 0; j < m1.size2(); j++)
        m1(i,j) += m2(i,j);

    viennacl::copy(vcl_hankel1, m2);
    std::cout << "Matrix Addition: " << diff(m1, m2);
    if (diff(m1, m2) < epsilon)
      std::cout << " [OK]" << std::endl;
    else
    {
      std::cout << " [FAILED]" << std::endl;
      return EXIT_FAILURE;
    }

    //
    // Per-Element access:
    //
    vcl_hankel1(4,2) = 42;

    for (std::size_t i = 0; i < m1.size1(); i++)    //reference calculation
      for (std::size_t j = 0; j < m1.size2(); j++)
      {
        if ((i + j) % (2*m1.size1()) == 6)
          m1(i, j) = 42;
      }

    viennacl::copy(vcl_hankel1, m2);
    std::cout << "Element manipulation: " << diff(m1, m2);
    if (diff(m1, m2) < epsilon)
      std::cout << " [OK]" << std::endl;
    else
    {
      std::cout << " [FAILED]" << std::endl;
      return EXIT_FAILURE;
    }

    return EXIT_SUCCESS;
}
Beispiel #8
0
void matrixTest()
{
	Matrix m1(3, 3);
	m1(1, 1) = 1;
	m1(1, 2) = 2;
	m1(1, 3) = 3;
	m1(2, 1) = 4;
	m1(2, 2) = 5;
	m1(2, 3) = 6;
	m1(3, 1) = 7;
	m1(3, 2) = 8;
	m1(3, 3) = 9;
	Vektor v(3);
	v(1) = 2;
	v(2) = 3;
	v(3) = 1;

	Vektor r(3);
	r = m1 * v;

	const Vektor cv(r);

	Vektor vexpected(3);
	vexpected(1) = 11;
	vexpected(2) = 29;
	vexpected(3) = 47;

	r.subtrahieren(vexpected);
	TEST(r.betrag() < 0.0001);

	TEST(fabs(cv(1) - vexpected(1)) < 0.0001);

	const Matrix cm(m1);
	TEST(fabs(m1(1, 1) - cm(1, 1)) < 0.0001);

	Matrix m2(3, 2);
	m2(1, 1) = 1;
	m2(1, 2) = 2;
	m2(2, 1) = 4;
	m2(2, 2) = 5;
	m2(3, 1) = 7;
	m2(3, 2) = 8;

	Matrix mr(m1 * m2);

	Matrix mexpected(3, 2);
	mexpected(1, 1) = 30;
	mexpected(1, 2) = 36;
	mexpected(2, 1) = 66;
	mexpected(2, 2) = 81;
	mexpected(3, 1) = 102;
	mexpected(3, 2) = 126;

	int i, j;
	for (i = 1; i < 3; ++i)
		for (j = 1; j < 2; ++j)
		{
			TEST(fabs(mr(i, j) - mexpected(i, j)) < 0.0001);
		}

	try
	{
		mr(3, 3);
		falsch();
	}
	catch (float nan)
	{
		if (nan == nan)
		{
			falsch();
		}
	}

	try
	{
		cv(7);
		falsch();
	}
	catch (float nan)
	{
		if (nan == nan)
		{
			falsch();
		}
	}

	TEST(fabs(v.skalarprodukt(vexpected) - 156) < 0.0001);
}
Beispiel #9
0
template<typename Scalar> void sparse_solvers(int rows, int cols)
{
  double density = std::max(8./(rows*cols), 0.01);
  typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
  typedef Matrix<Scalar,Dynamic,1> DenseVector;
  // Scalar eps = 1e-6;

  DenseVector vec1 = DenseVector::Random(rows);

  std::vector<Vector2i> zeroCoords;
  std::vector<Vector2i> nonzeroCoords;

  // test triangular solver
  {
    DenseVector vec2 = vec1, vec3 = vec1;
    SparseMatrix<Scalar> m2(rows, cols);
    DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);

    // lower - dense
    initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag|MakeLowerTriangular, &zeroCoords, &nonzeroCoords);
    VERIFY_IS_APPROX(refMat2.template triangularView<Lower>().solve(vec2),
                     m2.template triangularView<Lower>().solve(vec3));

    // upper - dense
    initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag|MakeUpperTriangular, &zeroCoords, &nonzeroCoords);
    VERIFY_IS_APPROX(refMat2.template triangularView<Upper>().solve(vec2),
                     m2.template triangularView<Upper>().solve(vec3));

    // lower - transpose
    initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag|MakeLowerTriangular, &zeroCoords, &nonzeroCoords);
    VERIFY_IS_APPROX(refMat2.transpose().template triangularView<Upper>().solve(vec2),
                     m2.transpose().template triangularView<Upper>().solve(vec3));

    // upper - transpose
    initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag|MakeUpperTriangular, &zeroCoords, &nonzeroCoords);
    VERIFY_IS_APPROX(refMat2.transpose().template triangularView<Lower>().solve(vec2),
                     m2.transpose().template triangularView<Lower>().solve(vec3));

    SparseMatrix<Scalar> matB(rows, rows);
    DenseMatrix refMatB = DenseMatrix::Zero(rows, rows);

    // lower - sparse
    initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag|MakeLowerTriangular);
    initSparse<Scalar>(density, refMatB, matB);
    refMat2.template triangularView<Lower>().solveInPlace(refMatB);
    m2.template triangularView<Lower>().solveInPlace(matB);
    VERIFY_IS_APPROX(matB.toDense(), refMatB);

    // upper - sparse
    initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag|MakeUpperTriangular);
    initSparse<Scalar>(density, refMatB, matB);
    refMat2.template triangularView<Upper>().solveInPlace(refMatB);
    m2.template triangularView<Upper>().solveInPlace(matB);
    VERIFY_IS_APPROX(matB, refMatB);

    // test deprecated API
    initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag|MakeLowerTriangular, &zeroCoords, &nonzeroCoords);
    VERIFY_IS_APPROX(refMat2.template triangularView<Lower>().solve(vec2),
                     m2.template triangularView<Lower>().solve(vec3));
  }
}
  void SaPFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::BuildP(Level &fineLevel, Level &coarseLevel) const {
    FactoryMonitor m(*this, "Prolongator smoothing", coarseLevel);

    typedef typename Teuchos::ScalarTraits<SC>::magnitudeType Magnitude;

    // Get default tentative prolongator factory
    // Getting it that way ensure that the same factory instance will be used for both SaPFactory and NullspaceFactory.
    // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere!
    RCP<const FactoryBase> initialPFact = GetFactory("P");
    if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); }

    // Level Get
    RCP<Matrix> A     = Get< RCP<Matrix> >(fineLevel, "A");
    RCP<Matrix> Ptent = coarseLevel.Get< RCP<Matrix> >("P", initialPFact.get());

    if(restrictionMode_) {
      SubFactoryMonitor m2(*this, "Transpose A", coarseLevel);
      A = Utils2::Transpose(*A, true); // build transpose of A explicitely
    }

    //Build final prolongator
    RCP<Matrix> finalP; // output

    //FIXME Xpetra::Matrix should calculate/stash max eigenvalue
    //FIXME SC lambdaMax = A->GetDinvALambda();

    const ParameterList & pL = GetParameterList();
    Scalar dampingFactor = pL.get<Scalar>("Damping factor");
    if (dampingFactor != Teuchos::ScalarTraits<Scalar>::zero()) {

      //Teuchos::ParameterList matrixList;
      //RCP<Matrix> I = MueLu::Gallery::CreateCrsMatrix<SC, LO, GO, Map, CrsMatrixWrap>("Identity", Get< RCP<Matrix> >(fineLevel, "A")->getRowMap(), matrixList);
      //RCP<Matrix> newPtent = Utils::TwoMatrixMultiply(I, false, Ptent, false);
      //Ptent = newPtent; //I tried a checkout of the original Ptent, and it seems to be gone now (which is good)

      Scalar lambdaMax;
      {
        SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel);
        lambdaMax = A->GetMaxEigenvalueEstimate();
        if (lambdaMax == -Teuchos::ScalarTraits<SC>::one()) {
          GetOStream(Statistics1, 0) << "Calculating max eigenvalue estimate now" << std::endl;
          Magnitude stopTol = 1e-4;
          lambdaMax = Utils::PowerMethod(*A, true, (LO) 10, stopTol);
          A->SetMaxEigenvalueEstimate(lambdaMax);
        } else {
          GetOStream(Statistics1, 0) << "Using cached max eigenvalue estimate" << std::endl;
        }
        GetOStream(Statistics0, 0) << "Prolongator damping factor = " << dampingFactor/lambdaMax << " (" << dampingFactor << " / " << lambdaMax << ")" << std::endl;
      }

      {
        SubFactoryMonitor m2(*this, "Fused (I-omega*D^{-1} A)*Ptent", coarseLevel);
        Teuchos::RCP<Vector> invDiag = Utils::GetMatrixDiagonalInverse(*A);

	SC omega = dampingFactor / lambdaMax;
	finalP=Utils::Jacobi(omega,*invDiag,*A, *Ptent, finalP,GetOStream(Statistics2,0));
      }

    } else {
      finalP = Ptent;
    }

    // Level Set
    if (!restrictionMode_) {
      // prolongation factory is in prolongation mode
      Set(coarseLevel, "P", finalP);

      // NOTE: EXPERIMENTAL
      if (Ptent->IsView("stridedMaps"))
        finalP->CreateView("stridedMaps", Ptent);

    } else {
      // prolongation factory is in restriction mode
      RCP<Matrix> R = Utils2::Transpose(*finalP, true); // use Utils2 -> specialization for double
      Set(coarseLevel, "R", R);

      // NOTE: EXPERIMENTAL
      if (Ptent->IsView("stridedMaps"))
        R->CreateView("stridedMaps", Ptent, true);
    }

    RCP<ParameterList> params = rcp(new ParameterList());
    params->set("printLoadBalancingInfo", true);
    GetOStream(Statistics1,0) << Utils::PrintMatrixInfo(*finalP, (!restrictionMode_ ? "P" : "R"), params);

  } //Build()
Beispiel #11
0
template<typename Scalar> void sparse_lu(int rows, int cols)
{
    double density = std::max(8./(rows*cols), 0.01);
    typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
    typedef Matrix<Scalar,Dynamic,1> DenseVector;

    DenseVector vec1 = DenseVector::Random(rows);

    std::vector<Vector2i> zeroCoords;
    std::vector<Vector2i> nonzeroCoords;

    SparseMatrix<Scalar> m2(rows, cols);
    DenseMatrix refMat2(rows, cols);

    DenseVector b = DenseVector::Random(cols);
    DenseVector refX(cols), x(cols);

    initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag, &zeroCoords, &nonzeroCoords);

    FullPivLU<DenseMatrix> refLu(refMat2);
    refX = refLu.solve(b);
#if defined(EIGEN_SUPERLU_SUPPORT) || defined(EIGEN_UMFPACK_SUPPORT)
    Scalar refDet = refLu.determinant();
#endif
    x.setZero();
    // // SparseLU<SparseMatrix<Scalar> > (m2).solve(b,&x);
    // // VERIFY(refX.isApprox(x,test_precision<Scalar>()) && "LU: default");

#ifdef EIGEN_UMFPACK_SUPPORT
    {
        // check solve
        x.setZero();
        SparseLU<SparseMatrix<Scalar>,UmfPack> lu(m2);
        VERIFY(lu.succeeded() && "umfpack LU decomposition failed");
        VERIFY(lu.solve(b,&x) && "umfpack LU solving failed");
        VERIFY(refX.isApprox(x,test_precision<Scalar>()) && "LU: umfpack");
        VERIFY_IS_APPROX(refDet,lu.determinant());
        // TODO check the extracted data
        //std::cerr << slu.matrixL() << "\n";
    }
#endif

#ifdef EIGEN_SUPERLU_SUPPORT
    {
        x.setZero();
        SparseLU<SparseMatrix<Scalar>,SuperLU> slu(m2);
        if (slu.succeeded())
        {
            if (slu.solve(b,&x)) {
                VERIFY(refX.isApprox(x,test_precision<Scalar>()) && "LU: SuperLU");
            }
            // std::cerr << refDet << " == " << slu.determinant() << "\n";
            if (slu.solve(b, &x, SvTranspose)) {
                VERIFY(b.isApprox(m2.transpose() * x, test_precision<Scalar>()));
            }

            if (slu.solve(b, &x, SvAdjoint)) {
                VERIFY(b.isApprox(m2.adjoint() * x, test_precision<Scalar>()));
            }

            if (!NumTraits<Scalar>::IsComplex) {
                VERIFY_IS_APPROX(refDet,slu.determinant()); // FIXME det is not very stable for complex
            }
        }
    }
#endif

}
Beispiel #12
0
template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& ref)
{
  typedef typename SparseMatrixType::StorageIndex StorageIndex;
  typedef Matrix<StorageIndex,2,1> Vector2;
  
  const Index rows = ref.rows();
  const Index cols = ref.cols();
  //const Index inner = ref.innerSize();
  //const Index outer = ref.outerSize();

  typedef typename SparseMatrixType::Scalar Scalar;
  enum { Flags = SparseMatrixType::Flags };

  double density = (std::max)(8./(rows*cols), 0.01);
  typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
  typedef Matrix<Scalar,Dynamic,1> DenseVector;
  Scalar eps = 1e-6;

  Scalar s1 = internal::random<Scalar>();
  {
    SparseMatrixType m(rows, cols);
    DenseMatrix refMat = DenseMatrix::Zero(rows, cols);
    DenseVector vec1 = DenseVector::Random(rows);

    std::vector<Vector2> zeroCoords;
    std::vector<Vector2> nonzeroCoords;
    initSparse<Scalar>(density, refMat, m, 0, &zeroCoords, &nonzeroCoords);

    // test coeff and coeffRef
    for (std::size_t i=0; i<zeroCoords.size(); ++i)
    {
      VERIFY_IS_MUCH_SMALLER_THAN( m.coeff(zeroCoords[i].x(),zeroCoords[i].y()), eps );
      if(internal::is_same<SparseMatrixType,SparseMatrix<Scalar,Flags> >::value)
        VERIFY_RAISES_ASSERT( m.coeffRef(zeroCoords[i].x(),zeroCoords[i].y()) = 5 );
    }
    VERIFY_IS_APPROX(m, refMat);

    if(!nonzeroCoords.empty()) {
      m.coeffRef(nonzeroCoords[0].x(), nonzeroCoords[0].y()) = Scalar(5);
      refMat.coeffRef(nonzeroCoords[0].x(), nonzeroCoords[0].y()) = Scalar(5);
    }

    VERIFY_IS_APPROX(m, refMat);

      // test assertion
      VERIFY_RAISES_ASSERT( m.coeffRef(-1,1) = 0 );
      VERIFY_RAISES_ASSERT( m.coeffRef(0,m.cols()) = 0 );
    }

    // test insert (inner random)
    {
      DenseMatrix m1(rows,cols);
      m1.setZero();
      SparseMatrixType m2(rows,cols);
      bool call_reserve = internal::random<int>()%2;
      Index nnz = internal::random<int>(1,int(rows)/2);
      if(call_reserve)
      {
        if(internal::random<int>()%2)
          m2.reserve(VectorXi::Constant(m2.outerSize(), int(nnz)));
        else
          m2.reserve(m2.outerSize() * nnz);
      }
      g_realloc_count = 0;
      for (Index j=0; j<cols; ++j)
      {
        for (Index k=0; k<nnz; ++k)
        {
          Index i = internal::random<Index>(0,rows-1);
          if (m1.coeff(i,j)==Scalar(0))
            m2.insert(i,j) = m1(i,j) = internal::random<Scalar>();
        }
      }
      
      if(call_reserve && !SparseMatrixType::IsRowMajor)
      {
        VERIFY(g_realloc_count==0);
      }
      
      m2.finalize();
      VERIFY_IS_APPROX(m2,m1);
    }

    // test insert (fully random)
    {
      DenseMatrix m1(rows,cols);
      m1.setZero();
      SparseMatrixType m2(rows,cols);
      if(internal::random<int>()%2)
        m2.reserve(VectorXi::Constant(m2.outerSize(), 2));
      for (int k=0; k<rows*cols; ++k)
      {
        Index i = internal::random<Index>(0,rows-1);
        Index j = internal::random<Index>(0,cols-1);
        if ((m1.coeff(i,j)==Scalar(0)) && (internal::random<int>()%2))
          m2.insert(i,j) = m1(i,j) = internal::random<Scalar>();
        else
        {
          Scalar v = internal::random<Scalar>();
          m2.coeffRef(i,j) += v;
          m1(i,j) += v;
        }
      }
      VERIFY_IS_APPROX(m2,m1);
    }
    
    // test insert (un-compressed)
    for(int mode=0;mode<4;++mode)
    {
      DenseMatrix m1(rows,cols);
      m1.setZero();
      SparseMatrixType m2(rows,cols);
      VectorXi r(VectorXi::Constant(m2.outerSize(), ((mode%2)==0) ? int(m2.innerSize()) : std::max<int>(1,int(m2.innerSize())/8)));
      m2.reserve(r);
      for (Index k=0; k<rows*cols; ++k)
      {
        Index i = internal::random<Index>(0,rows-1);
        Index j = internal::random<Index>(0,cols-1);
        if (m1.coeff(i,j)==Scalar(0))
          m2.insert(i,j) = m1(i,j) = internal::random<Scalar>();
        if(mode==3)
          m2.reserve(r);
      }
      if(internal::random<int>()%2)
        m2.makeCompressed();
      VERIFY_IS_APPROX(m2,m1);
    }

  // test basic computations
  {
    DenseMatrix refM1 = DenseMatrix::Zero(rows, cols);
    DenseMatrix refM2 = DenseMatrix::Zero(rows, cols);
    DenseMatrix refM3 = DenseMatrix::Zero(rows, cols);
    DenseMatrix refM4 = DenseMatrix::Zero(rows, cols);
    SparseMatrixType m1(rows, cols);
    SparseMatrixType m2(rows, cols);
    SparseMatrixType m3(rows, cols);
    SparseMatrixType m4(rows, cols);
    initSparse<Scalar>(density, refM1, m1);
    initSparse<Scalar>(density, refM2, m2);
    initSparse<Scalar>(density, refM3, m3);
    initSparse<Scalar>(density, refM4, m4);

    if(internal::random<bool>())
      m1.makeCompressed();

    VERIFY_IS_APPROX(m1*s1, refM1*s1);
    VERIFY_IS_APPROX(m1+m2, refM1+refM2);
    VERIFY_IS_APPROX(m1+m2+m3, refM1+refM2+refM3);
    VERIFY_IS_APPROX(m3.cwiseProduct(m1+m2), refM3.cwiseProduct(refM1+refM2));
    VERIFY_IS_APPROX(m1*s1-m2, refM1*s1-refM2);

    if(SparseMatrixType::IsRowMajor)
      VERIFY_IS_APPROX(m1.innerVector(0).dot(refM2.row(0)), refM1.row(0).dot(refM2.row(0)));
    else
      VERIFY_IS_APPROX(m1.innerVector(0).dot(refM2.col(0)), refM1.col(0).dot(refM2.col(0)));
    
    DenseVector rv = DenseVector::Random(m1.cols());
    DenseVector cv = DenseVector::Random(m1.rows());
    Index r = internal::random<Index>(0,m1.rows()-2);
    Index c = internal::random<Index>(0,m1.cols()-1);
    VERIFY_IS_APPROX(( m1.template block<1,Dynamic>(r,0,1,m1.cols()).dot(rv)) , refM1.row(r).dot(rv));
    VERIFY_IS_APPROX(m1.row(r).dot(rv), refM1.row(r).dot(rv));
    VERIFY_IS_APPROX(m1.col(c).dot(cv), refM1.col(c).dot(cv));

    VERIFY_IS_APPROX(m1.conjugate(), refM1.conjugate());
    VERIFY_IS_APPROX(m1.real(), refM1.real());

    refM4.setRandom();
    // sparse cwise* dense
    VERIFY_IS_APPROX(m3.cwiseProduct(refM4), refM3.cwiseProduct(refM4));
    // dense cwise* sparse
    VERIFY_IS_APPROX(refM4.cwiseProduct(m3), refM4.cwiseProduct(refM3));
//     VERIFY_IS_APPROX(m3.cwise()/refM4, refM3.cwise()/refM4);

    VERIFY_IS_APPROX(refM4 + m3, refM4 + refM3);
    VERIFY_IS_APPROX(m3 + refM4, refM3 + refM4);
    VERIFY_IS_APPROX(refM4 - m3, refM4 - refM3);
    VERIFY_IS_APPROX(m3 - refM4, refM3 - refM4);

    VERIFY_IS_APPROX(m1.sum(), refM1.sum());

    VERIFY_IS_APPROX(m1*=s1, refM1*=s1);
    VERIFY_IS_APPROX(m1/=s1, refM1/=s1);

    VERIFY_IS_APPROX(m1+=m2, refM1+=refM2);
    VERIFY_IS_APPROX(m1-=m2, refM1-=refM2);

    // test aliasing
    VERIFY_IS_APPROX((m1 = -m1), (refM1 = -refM1));
    VERIFY_IS_APPROX((m1 = m1.transpose()), (refM1 = refM1.transpose().eval()));
    VERIFY_IS_APPROX((m1 = -m1.transpose()), (refM1 = -refM1.transpose().eval()));
    VERIFY_IS_APPROX((m1 += -m1), (refM1 += -refM1));

    if(m1.isCompressed())
    {
      VERIFY_IS_APPROX(m1.coeffs().sum(), m1.sum());
      m1.coeffs() += s1;
      for(Index j = 0; j<m1.outerSize(); ++j)
        for(typename SparseMatrixType::InnerIterator it(m1,j); it; ++it)
          refM1(it.row(), it.col()) += s1;
      VERIFY_IS_APPROX(m1, refM1);
    }

    // and/or
    {
      typedef SparseMatrix<bool, SparseMatrixType::Options, typename SparseMatrixType::StorageIndex> SpBool;
      SpBool mb1 = m1.real().template cast<bool>();
      SpBool mb2 = m2.real().template cast<bool>();
      VERIFY_IS_EQUAL(mb1.template cast<int>().sum(), refM1.real().template cast<bool>().count());
      VERIFY_IS_EQUAL((mb1 && mb2).template cast<int>().sum(), (refM1.real().template cast<bool>() && refM2.real().template cast<bool>()).count());
      VERIFY_IS_EQUAL((mb1 || mb2).template cast<int>().sum(), (refM1.real().template cast<bool>() || refM2.real().template cast<bool>()).count());
      SpBool mb3 = mb1 && mb2;
      if(mb1.coeffs().all() && mb2.coeffs().all())
      {
        VERIFY_IS_EQUAL(mb3.nonZeros(), (refM1.real().template cast<bool>() && refM2.real().template cast<bool>()).count());
      }
    }
  }

  // test reverse iterators
  {
    DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
    SparseMatrixType m2(rows, cols);
    initSparse<Scalar>(density, refMat2, m2);
    std::vector<Scalar> ref_value(m2.innerSize());
    std::vector<Index> ref_index(m2.innerSize());
    if(internal::random<bool>())
      m2.makeCompressed();
    for(Index j = 0; j<m2.outerSize(); ++j)
    {
      Index count_forward = 0;

      for(typename SparseMatrixType::InnerIterator it(m2,j); it; ++it)
      {
        ref_value[ref_value.size()-1-count_forward] = it.value();
        ref_index[ref_index.size()-1-count_forward] = it.index();
        count_forward++;
      }
      Index count_reverse = 0;
      for(typename SparseMatrixType::ReverseInnerIterator it(m2,j); it; --it)
      {
        VERIFY_IS_APPROX( std::abs(ref_value[ref_value.size()-count_forward+count_reverse])+1, std::abs(it.value())+1);
        VERIFY_IS_EQUAL( ref_index[ref_index.size()-count_forward+count_reverse] , it.index());
        count_reverse++;
      }
      VERIFY_IS_EQUAL(count_forward, count_reverse);
    }
  }

  // test transpose
  {
    DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
    SparseMatrixType m2(rows, cols);
    initSparse<Scalar>(density, refMat2, m2);
    VERIFY_IS_APPROX(m2.transpose().eval(), refMat2.transpose().eval());
    VERIFY_IS_APPROX(m2.transpose(), refMat2.transpose());

    VERIFY_IS_APPROX(SparseMatrixType(m2.adjoint()), refMat2.adjoint());
    
    // check isApprox handles opposite storage order
    typename Transpose<SparseMatrixType>::PlainObject m3(m2);
    VERIFY(m2.isApprox(m3));
  }

  // test prune
  {
    SparseMatrixType m2(rows, cols);
    DenseMatrix refM2(rows, cols);
    refM2.setZero();
    int countFalseNonZero = 0;
    int countTrueNonZero = 0;
    m2.reserve(VectorXi::Constant(m2.outerSize(), int(m2.innerSize())));
    for (Index j=0; j<m2.cols(); ++j)
    {
      for (Index i=0; i<m2.rows(); ++i)
      {
        float x = internal::random<float>(0,1);
        if (x<0.1f)
        {
          // do nothing
        }
        else if (x<0.5f)
        {
          countFalseNonZero++;
          m2.insert(i,j) = Scalar(0);
        }
        else
        {
          countTrueNonZero++;
          m2.insert(i,j) = Scalar(1);
          refM2(i,j) = Scalar(1);
        }
      }
    }
    if(internal::random<bool>())
      m2.makeCompressed();
    VERIFY(countFalseNonZero+countTrueNonZero == m2.nonZeros());
    if(countTrueNonZero>0)
      VERIFY_IS_APPROX(m2, refM2);
    m2.prune(Scalar(1));
    VERIFY(countTrueNonZero==m2.nonZeros());
    VERIFY_IS_APPROX(m2, refM2);
  }

  // test setFromTriplets
  {
    typedef Triplet<Scalar,StorageIndex> TripletType;
    std::vector<TripletType> triplets;
    Index ntriplets = rows*cols;
    triplets.reserve(ntriplets);
    DenseMatrix refMat_sum  = DenseMatrix::Zero(rows,cols);
    DenseMatrix refMat_prod = DenseMatrix::Zero(rows,cols);
    DenseMatrix refMat_last = DenseMatrix::Zero(rows,cols);

    for(Index i=0;i<ntriplets;++i)
    {
      StorageIndex r = internal::random<StorageIndex>(0,StorageIndex(rows-1));
      StorageIndex c = internal::random<StorageIndex>(0,StorageIndex(cols-1));
      Scalar v = internal::random<Scalar>();
      triplets.push_back(TripletType(r,c,v));
      refMat_sum(r,c) += v;
      if(std::abs(refMat_prod(r,c))==0)
        refMat_prod(r,c) = v;
      else
        refMat_prod(r,c) *= v;
      refMat_last(r,c) = v;
    }
    SparseMatrixType m(rows,cols);
    m.setFromTriplets(triplets.begin(), triplets.end());
    VERIFY_IS_APPROX(m, refMat_sum);

    m.setFromTriplets(triplets.begin(), triplets.end(), std::multiplies<Scalar>());
    VERIFY_IS_APPROX(m, refMat_prod);
#if (defined(__cplusplus) && __cplusplus >= 201103L)
    m.setFromTriplets(triplets.begin(), triplets.end(), [] (Scalar,Scalar b) { return b; });
    VERIFY_IS_APPROX(m, refMat_last);
#endif
  }
  
  // test Map
  {
    DenseMatrix refMat2(rows, cols), refMat3(rows, cols);
    SparseMatrixType m2(rows, cols), m3(rows, cols);
    initSparse<Scalar>(density, refMat2, m2);
    initSparse<Scalar>(density, refMat3, m3);
    {
      Map<SparseMatrixType> mapMat2(m2.rows(), m2.cols(), m2.nonZeros(), m2.outerIndexPtr(), m2.innerIndexPtr(), m2.valuePtr(), m2.innerNonZeroPtr());
      Map<SparseMatrixType> mapMat3(m3.rows(), m3.cols(), m3.nonZeros(), m3.outerIndexPtr(), m3.innerIndexPtr(), m3.valuePtr(), m3.innerNonZeroPtr());
      VERIFY_IS_APPROX(mapMat2+mapMat3, refMat2+refMat3);
      VERIFY_IS_APPROX(mapMat2+mapMat3, refMat2+refMat3);
    }
    {
      MappedSparseMatrix<Scalar,SparseMatrixType::Options,StorageIndex> mapMat2(m2.rows(), m2.cols(), m2.nonZeros(), m2.outerIndexPtr(), m2.innerIndexPtr(), m2.valuePtr(), m2.innerNonZeroPtr());
      MappedSparseMatrix<Scalar,SparseMatrixType::Options,StorageIndex> mapMat3(m3.rows(), m3.cols(), m3.nonZeros(), m3.outerIndexPtr(), m3.innerIndexPtr(), m3.valuePtr(), m3.innerNonZeroPtr());
      VERIFY_IS_APPROX(mapMat2+mapMat3, refMat2+refMat3);
      VERIFY_IS_APPROX(mapMat2+mapMat3, refMat2+refMat3);
    }

    Index i = internal::random<Index>(0,rows-1);
    Index j = internal::random<Index>(0,cols-1);
    m2.coeffRef(i,j) = 123;
    if(internal::random<bool>())
      m2.makeCompressed();
    Map<SparseMatrixType> mapMat2(rows, cols, m2.nonZeros(), m2.outerIndexPtr(), m2.innerIndexPtr(), m2.valuePtr(),  m2.innerNonZeroPtr());
    VERIFY_IS_EQUAL(m2.coeff(i,j),Scalar(123));
    VERIFY_IS_EQUAL(mapMat2.coeff(i,j),Scalar(123));
    mapMat2.coeffRef(i,j) = -123;
    VERIFY_IS_EQUAL(m2.coeff(i,j),Scalar(-123));
  }

  // test triangularView
  {
    DenseMatrix refMat2(rows, cols), refMat3(rows, cols);
    SparseMatrixType m2(rows, cols), m3(rows, cols);
    initSparse<Scalar>(density, refMat2, m2);
    refMat3 = refMat2.template triangularView<Lower>();
    m3 = m2.template triangularView<Lower>();
    VERIFY_IS_APPROX(m3, refMat3);

    refMat3 = refMat2.template triangularView<Upper>();
    m3 = m2.template triangularView<Upper>();
    VERIFY_IS_APPROX(m3, refMat3);

    {
      refMat3 = refMat2.template triangularView<UnitUpper>();
      m3 = m2.template triangularView<UnitUpper>();
      VERIFY_IS_APPROX(m3, refMat3);

      refMat3 = refMat2.template triangularView<UnitLower>();
      m3 = m2.template triangularView<UnitLower>();
      VERIFY_IS_APPROX(m3, refMat3);
    }

    refMat3 = refMat2.template triangularView<StrictlyUpper>();
    m3 = m2.template triangularView<StrictlyUpper>();
    VERIFY_IS_APPROX(m3, refMat3);

    refMat3 = refMat2.template triangularView<StrictlyLower>();
    m3 = m2.template triangularView<StrictlyLower>();
    VERIFY_IS_APPROX(m3, refMat3);

    // check sparse-traingular to dense
    refMat3 = m2.template triangularView<StrictlyUpper>();
    VERIFY_IS_APPROX(refMat3, DenseMatrix(refMat2.template triangularView<StrictlyUpper>()));
  }
  
  // test selfadjointView
  if(!SparseMatrixType::IsRowMajor)
  {
    DenseMatrix refMat2(rows, rows), refMat3(rows, rows);
    SparseMatrixType m2(rows, rows), m3(rows, rows);
    initSparse<Scalar>(density, refMat2, m2);
    refMat3 = refMat2.template selfadjointView<Lower>();
    m3 = m2.template selfadjointView<Lower>();
    VERIFY_IS_APPROX(m3, refMat3);

    refMat3 += refMat2.template selfadjointView<Lower>();
    m3 += m2.template selfadjointView<Lower>();
    VERIFY_IS_APPROX(m3, refMat3);

    refMat3 -= refMat2.template selfadjointView<Lower>();
    m3 -= m2.template selfadjointView<Lower>();
    VERIFY_IS_APPROX(m3, refMat3);

    // selfadjointView only works for square matrices:
    SparseMatrixType m4(rows, rows+1);
    VERIFY_RAISES_ASSERT(m4.template selfadjointView<Lower>());
    VERIFY_RAISES_ASSERT(m4.template selfadjointView<Upper>());
  }
  
  // test sparseView
  {
    DenseMatrix refMat2 = DenseMatrix::Zero(rows, rows);
    SparseMatrixType m2(rows, rows);
    initSparse<Scalar>(density, refMat2, m2);
    VERIFY_IS_APPROX(m2.eval(), refMat2.sparseView().eval());

    // sparse view on expressions:
    VERIFY_IS_APPROX((s1*m2).eval(), (s1*refMat2).sparseView().eval());
    VERIFY_IS_APPROX((m2+m2).eval(), (refMat2+refMat2).sparseView().eval());
    VERIFY_IS_APPROX((m2*m2).eval(), (refMat2.lazyProduct(refMat2)).sparseView().eval());
    VERIFY_IS_APPROX((m2*m2).eval(), (refMat2*refMat2).sparseView().eval());
  }

  // test diagonal
  {
    DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
    SparseMatrixType m2(rows, cols);
    initSparse<Scalar>(density, refMat2, m2);
    VERIFY_IS_APPROX(m2.diagonal(), refMat2.diagonal().eval());
    VERIFY_IS_APPROX(const_cast<const SparseMatrixType&>(m2).diagonal(), refMat2.diagonal().eval());
    
    initSparse<Scalar>(density, refMat2, m2, ForceNonZeroDiag);
    m2.diagonal()      += refMat2.diagonal();
    refMat2.diagonal() += refMat2.diagonal();
    VERIFY_IS_APPROX(m2, refMat2);
  }
  
  // test diagonal to sparse
  {
    DenseVector d = DenseVector::Random(rows);
    DenseMatrix refMat2 = d.asDiagonal();
    SparseMatrixType m2(rows, rows);
    m2 = d.asDiagonal();
    VERIFY_IS_APPROX(m2, refMat2);
    SparseMatrixType m3(d.asDiagonal());
    VERIFY_IS_APPROX(m3, refMat2);
    refMat2 += d.asDiagonal();
    m2 += d.asDiagonal();
    VERIFY_IS_APPROX(m2, refMat2);
  }
  
  // test conservative resize
  {
      std::vector< std::pair<StorageIndex,StorageIndex> > inc;
      if(rows > 3 && cols > 2)
        inc.push_back(std::pair<StorageIndex,StorageIndex>(-3,-2));
      inc.push_back(std::pair<StorageIndex,StorageIndex>(0,0));
      inc.push_back(std::pair<StorageIndex,StorageIndex>(3,2));
      inc.push_back(std::pair<StorageIndex,StorageIndex>(3,0));
      inc.push_back(std::pair<StorageIndex,StorageIndex>(0,3));
      
      for(size_t i = 0; i< inc.size(); i++) {
        StorageIndex incRows = inc[i].first;
        StorageIndex incCols = inc[i].second;
        SparseMatrixType m1(rows, cols);
        DenseMatrix refMat1 = DenseMatrix::Zero(rows, cols);
        initSparse<Scalar>(density, refMat1, m1);
        
        m1.conservativeResize(rows+incRows, cols+incCols);
        refMat1.conservativeResize(rows+incRows, cols+incCols);
        if (incRows > 0) refMat1.bottomRows(incRows).setZero();
        if (incCols > 0) refMat1.rightCols(incCols).setZero();
        
        VERIFY_IS_APPROX(m1, refMat1);
        
        // Insert new values
        if (incRows > 0) 
          m1.insert(m1.rows()-1, 0) = refMat1(refMat1.rows()-1, 0) = 1;
        if (incCols > 0) 
          m1.insert(0, m1.cols()-1) = refMat1(0, refMat1.cols()-1) = 1;
          
        VERIFY_IS_APPROX(m1, refMat1);
          
          
      }
  }

  // test Identity matrix
  {
    DenseMatrix refMat1 = DenseMatrix::Identity(rows, rows);
    SparseMatrixType m1(rows, rows);
    m1.setIdentity();
    VERIFY_IS_APPROX(m1, refMat1);
    for(int k=0; k<rows*rows/4; ++k)
    {
      Index i = internal::random<Index>(0,rows-1);
      Index j = internal::random<Index>(0,rows-1);
      Scalar v = internal::random<Scalar>();
      m1.coeffRef(i,j) = v;
      refMat1.coeffRef(i,j) = v;
      VERIFY_IS_APPROX(m1, refMat1);
      if(internal::random<Index>(0,10)<2)
        m1.makeCompressed();
    }
    m1.setIdentity();
    refMat1.setIdentity();
    VERIFY_IS_APPROX(m1, refMat1);
  }

  // test array/vector of InnerIterator
  {
    typedef typename SparseMatrixType::InnerIterator IteratorType;

    DenseMatrix refMat2 = DenseMatrix::Zero(rows, cols);
    SparseMatrixType m2(rows, cols);
    initSparse<Scalar>(density, refMat2, m2);
    IteratorType static_array[2];
    static_array[0] = IteratorType(m2,0);
    static_array[1] = IteratorType(m2,m2.outerSize()-1);
    VERIFY( static_array[0] || m2.innerVector(static_array[0].outer()).nonZeros() == 0 );
    VERIFY( static_array[1] || m2.innerVector(static_array[1].outer()).nonZeros() == 0 );
    if(static_array[0] && static_array[1])
    {
      ++(static_array[1]);
      static_array[1] = IteratorType(m2,0);
      VERIFY( static_array[1] );
      VERIFY( static_array[1].index() == static_array[0].index() );
      VERIFY( static_array[1].outer() == static_array[0].outer() );
      VERIFY( static_array[1].value() == static_array[0].value() );
    }

    std::vector<IteratorType> iters(2);
    iters[0] = IteratorType(m2,0);
    iters[1] = IteratorType(m2,m2.outerSize()-1);
  }
}
Beispiel #13
0
int main(int argc, char* argv[]) {
    constexpr float l = -0.1f;
    constexpr float r =  0.1f;
    constexpr float b = -0.1f;
    constexpr float t =  0.1f;

    constexpr float dist = 0.1f;

    constexpr int SAMPLES   = 64;

    const int SAMPLEDIM = sqrt(SAMPLES);

#ifndef USE_OPENGL
    ppm = easyppm_create(NX, NY, IMAGETYPE_PPM);
#endif

    // Materials
    Material mp(Color(0.2f, 0.2f, 0.2f), Color(1.0f, 1.0f, 1.0f), Color(0.0f, 0.0f, 0.0f),  0.0f, 0.5f);
    Material m1(Color(0.2f, 0.0f, 0.0f), Color(1.0f, 0.0f, 0.0f), Color(0.0f, 0.0f, 0.0f),  0.0f, 0.0f);
    Material m2(Color(0.0f, 0.2f, 0.0f), Color(0.0f, 0.5f, 0.0f), Color(0.5f, 0.5f, 0.5f), 32.0f, 0.0f);
    Material m3(Color(0.0f, 0.0f, 0.2f), Color(0.0f, 0.0f, 1.0f), Color(0.0f, 0.0f, 0.0f),  0.0f, 0.8f);

    // Surfaces
    SurfaceList surfaces;
    surfaces.add(std::move(std::unique_ptr<Plane>(new Plane(Eigen::Vector3f(0, -2, 0), Eigen::Vector3f(0, 1, 0), mp))));
    surfaces.add(std::move(std::unique_ptr<Sphere>(new Sphere(Eigen::Vector3f(-4, 0, -7), 1, m1))));
    surfaces.add(std::move(std::unique_ptr<Sphere>(new Sphere(Eigen::Vector3f( 0, 0, -7), 2, m2))));
    surfaces.add(std::move(std::unique_ptr<Sphere>(new Sphere(Eigen::Vector3f( 4, 0, -7), 1, m3))));

    // Light
    Light light(Eigen::Vector3f(-4, 4, -3), 1);

    // Add surfaces and light to scene
    Scene scene(surfaces, light);

    // Fill pixel buffer
    buffer = new Color[NX*NY];
    #if defined(USE_OPENMP)
        #pragma omp parallel for
    #endif
    for (int k = 0; k < NX*NY; k++) {
        int i = k % NX;
        int j = k / NX;
        Color res(0,0,0);
        for (int si = 0; si < SAMPLEDIM; si++) {
            for (int sj = 0; sj < SAMPLEDIM; sj++) {
                // Uniform sampling
                float x = (i-0.5f) + (si)/(float)SAMPLEDIM;
                float y = (j-0.5f) + (sj)/(float)SAMPLEDIM;

                float u = l + ((r-l)*(x+0.5f)/NX);
                float v = b + ((t-b)*(y+0.5f)/NY);

                auto p = Eigen::Vector3f(0, 0, 0);
                auto d = Eigen::Vector3f(u, v, -dist);

                auto ray = Ray(p, d);

                auto hit = std::unique_ptr<Intersection>(scene.intersect(ray));
                if (hit)
                    res += scene.shade(ray, *hit, 2, true, false).correct(2.2f);
            }
        }
        #if defined(USE_OPENGL)
            buffer[i*NY + j] = res / SAMPLES;
        #else
            auto color = res / SAMPLES;
            auto r = clamp(color.r * 255, 0, 255);
            auto g = clamp(color.g * 255, 0, 255);
            auto b = clamp(color.b * 255, 0, 255);
            easyppm_set(&ppm, i, j, easyppm_rgb(r, g, b));
        #endif
    }

    #if defined(USE_OPENGL)
        // Write buffer to OpenGL window
        glutInit(&argc, argv);
        glutInitDisplayMode(GLUT_RGB | GLUT_DEPTH | GLUT_DOUBLE);
        glutInitWindowSize(NX, NY);
        glutCreateWindow("Part 4");
        glutDisplayFunc(gl_display);
        glutKeyboardFunc(gl_keyboard);
        glutMainLoop();
    #else
        // Write buffer to image file
        const char* path = "images/part4.ppm";

        easyppm_invert_y(&ppm);
        easyppm_write(&ppm, path);
        easyppm_destroy(&ppm);

        printf("Image written to %s\n", path);
    #endif

    delete[] buffer;

    return 0;
}
Beispiel #14
0
        bool AdvancedMatrixTests()
        {
            Matrix m(1, 0, 0, 0,
                     0, 2, 0, 0,
                     0, 0, 3, 0,
                     0, 0, 0, 4);

            float trace = m.Trace();
            float expectedTrace = 1 + 2 + 3 + 4;

            if (trace != expectedTrace)
            {
                wprintf(L"trace didn't match! %f\n", trace);
                return false;
            }

            float det = m.Determinant();
            // that's a non-reflective, scaling matrix with a scale > 1, so we should have a pos det > 1
            if (det <= 1)
            {
                wprintf(L"determinant wasn't > 1! %f\n", det);
                return false;
            }

            Matrix invM;
            // we only don't have an inverse if det(M) == 0
            if (m.Inverse(&invM) != (det != 0))
            {
                wprintf(L"inverse existence didn't match determinant! %f\n", det);
                return false;
            }

            // if we had an inverse, let's verify it
            if (det != 0)
            {
                // first, m * invM == I
                Matrix testForIdentity = m * invM;
                // quick test for identiy is trace == 4 && det = 1. Not perfect, but good enough
                if (testForIdentity.Trace() != 4 || testForIdentity.Determinant() != 1)
                {
                    wprintf(L"M * invM != I!\n");
                    return false;
                }

                // second, inverting the inverse should give us the original matrix
                Matrix testM;
                if (!invM.Inverse(&testM))
                {
                    return false;
                }

                if (testM.Trace() != trace || testM.Determinant() != det)
                {
                    wprintf(L"inverting the inverse didn't appear to give the original matrix!\n");
                    return false;
                }
            }

            // inverse of an orthonormal matrix (any rotation matrix) is equivelant to it's transpose
            Matrix m2(Matrix::CreateFromAxisAngle(Vector3(0.707f, 0.707f, 0.0f), 1.234f));
            Matrix invM2;
            if (!m2.Inverse(&invM2))
            {
                return false;
            }

            Matrix transM2 = m2.Transpose();
            if (invM2.Trace() != transM2.Trace() || invM2.Determinant() != transM2.Determinant())
            {
                return false;
            }

            // another inverse test
            Matrix m3(1, 2, 0, 0,
                      4, 0, 4, 0,
                      0, 8, 8, 0,
                      1, 0, 2, 1);
            Matrix invM3;
            if (!m3.Inverse(&invM3))
            {
                return false;
            }

            DirectX::XMMATRIX xm = m3;
            DirectX::XMVECTOR xdet;
            DirectX::XMMATRIX invXM = DirectX::XMMatrixInverse(&xdet, xm);

            DirectX::XMFLOAT4X4 invXMF;
            DirectX::XMStoreFloat4x4(&invXMF, invXM);

            return true;
        }
void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
    // trw_bprop_helper2(model, psi_ij_rho, psi_i, rho, maxiter, n, m1, m2, b_i,...
    //                   dm1, dm2, dn, dpsi_i, dpsi_ij);

    int i=0;
    int nnodes     = mapdouble(mxGetField(prhs[i],0,"nnodes"))(0);
    int ncliques   = mapdouble(mxGetField(prhs[i],0,"ncliques"))(0);
    int nvals      = mapdouble(mxGetField(prhs[i],0,"nvals"))(0);
    MatrixXi pairs = mapdouble(mxGetField(prhs[i],0,"pairs")).cast<int>();
    pairs.array() -= 1;
    MatrixXi N1   = mapdouble(mxGetField(prhs[i],0,"N1")).cast<int>();
    N1.array() -= 1;
    MatrixXi N2   = mapdouble(mxGetField(prhs[i],0,"N2")).cast<int>();
    N2.array() -= 1;
    MatrixXi tree2clique  = mapdouble(mxGetField(prhs[i],0,"tree2clique")).cast<int>();
    tree2clique.array() -= 1;
    MatrixXi treeschedule = mapdouble(mxGetField(prhs[i],0,"treeschedule")).cast<int>();
    treeschedule.array() -= 1;
    i++;

    MatrixMd psi_ij  = mapdouble(prhs[i++]);
    MatrixMd psi_i   = mapdouble(prhs[i++]);
    double rho       = mapdouble(prhs[i++])(0);
    int maxiter      = mapdouble(prhs[i++])(0);
    MatrixMd n       = mapdouble(prhs[i++]);
    MatrixMd m1      = mapdouble(prhs[i++]);
    MatrixMd m2      = mapdouble(prhs[i++]);
    MatrixMd b_i     = mapdouble(prhs[i++]);
    MatrixMd mstor   = mapdouble(prhs[i++]);
    MatrixMd dm1     = mapdouble(prhs[i++]);
    MatrixMd dm2     = mapdouble(prhs[i++]);
    MatrixMd dn      = mapdouble(prhs[i++]);
    MatrixMd dpsi_i  = mapdouble(prhs[i++]);
    MatrixMd dpsi_ij = mapdouble(prhs[i++]);
    MatrixMd b_ij0   = mapdouble(prhs[i++]);
    MatrixMd db_ij0  = mapdouble(prhs[i++]);
    int dorec        = mapdouble(prhs[i++]).cast<int>()(0);

    MatrixMi w      = mapint32(prhs[i++]);

    #pragma omp parallel for num_threads(NTHREAD)
    for(int c=0; c<ncliques; c++) {
        int i = pairs(c, 0);
        int j = pairs(c, 1);
        for(int yi=0; yi<nvals; yi++) {
            for(int yj=0; yj<nvals; yj++) {
                int index = yi + yj*nvals;
                //b_ij0(index,c) = b_ij0(index,c)*psi_i(yi,i)*psi_i(yj,j)*n(yi,i)*n(yj,j)/m1(yi,c)/m2(yj,c);
                dpsi_i(yi, i) = dpsi_i(yi, i) + db_ij0(index, c)*b_ij0(index, c)/psi_i(yi, i);
                dpsi_i(yj, j) = dpsi_i(yj, j) + db_ij0(index, c)*b_ij0(index, c)/psi_i(yj, j);
                dn(yi, i)     = dn(yi, i)     + db_ij0(index, c)*b_ij0(index, c)/n(yi, i);
                dn(yj, j)     = dn(yj, j)     + db_ij0(index, c)*b_ij0(index, c)/n(yj, j);
                dm1(yi, c)    = dm1(yi, c)    - db_ij0(index, c)*b_ij0(index, c)/m1(yi, c);
                dm2(yj, c)    = dm2(yj, c)    - db_ij0(index, c)*b_ij0(index, c)/m2(yj, c);
            }
        }
    }

    #pragma omp parallel for num_threads(NTHREAD)
    for(int i=0; i<b_i.cols(); i++) {
        for(int yi=0; yi<nvals; yi++) {
            for(int k=0; k<N1.cols(); k++) {
                int d = N1(i, k);
                if(d==-2) continue;
                //n(yi, i) *= m1(yi, d);
                dm1(yi,d) += rho * dn(yi,i)*n(yi,i)/m1(yi,d);
            }
            for(int k=0; k<N2.cols(); k++) {
                int d = N2(i, k);
                if(d==-2) continue;
                //n(yi, i) *= m2(yi, d);
                dm2(yi,d) += rho * dn(yi,i)*n(yi,i)/m2(yi,d);
            }
        }
    }

    //tree_ncliques = sum(double(model.tree2clique>0),1);
    //ntree = length(tree_ncliques);
    int ntree = tree2clique.cols();
    MatrixXi tree_ncliques = MatrixXi::Zero(ntree,1);
    for(int tree=0; tree<tree2clique.cols(); tree++)
        for(i=0; i<tree2clique.rows(); i++)
            if(tree2clique(i,tree) != -1)
                tree_ncliques(tree)++;

    int reps;
    double conv;
    for(reps=0; reps<maxiter; reps++) {
        // must re-order blocks
        //for(int block=0; block<treeschedule.cols(); block++){
        for(int block=treeschedule.cols()-1; block>=0; block--) {
            // need not re-order trees, but why not...
            // helps if someone specifies not parallel trees
            // for(int treenum=treeschedule.rows()-1; treenum>=0; treenum--){
            #pragma omp parallel for schedule(dynamic) num_threads(NTHREAD)
            for(int treenum=0; treenum<treeschedule.rows(); treenum++) {
                MatrixXd S (nvals,nvals);
                MatrixXd m0(nvals,1);

                int tree = treeschedule(treenum,block);
                if(tree==-1)
                    continue;
                int ncliques = tree_ncliques(tree);

                for(int c0=2*ncliques-1; c0>=0; c0--) {
                    int c, mode;
                    if(c0<ncliques) {
                        c    = tree2clique(c0,tree);
                        mode = 1;
                    } else {
                        c    = tree2clique(ncliques - 1 - (c0-ncliques),tree);
                        mode = 2;
                    }

                    //cout << "BW c " << c << " mode " << mode << endl;

                    int i = pairs(c,0);
                    int j = pairs(c,1);

                    if( mode==1 ) {

                        for(int yi=0; yi<nvals; yi++)
                            n(yi, i) = 1;
                        for(int k=0; k<N1.cols(); k++) {
                            int d = N1(i, k);
                            if(d==-2) continue;
                            for(int yi=0; yi<nvals; yi++)
                                n(yi, i) *= m1(yi, d);
                        }
                        for(int k=0; k<N2.cols(); k++) {
                            int d = N2(i, k);
                            if(d==-2) continue;
                            for(int yi=0; yi<nvals; yi++)
                                n(yi, i) *= m2(yi, d);
                        }
                        // n.col(i) = n.col(i).array().pow(rho);
                        if(rho==1) {
                            //nothing
                        } else if(rho==.5) {
                            n.col(i) = n.col(i).array().sqrt();
                        } else
                            n.col(i) = n.col(i).array().pow(rho);

                        // compute m(y_j)
                        for(int yj=0; yj<nvals; yj++) {
                            m0(yj) = 0;
                            for(int yi=0; yi<nvals; yi++) {
                                int index = yi + yj*nvals;
                                S(yi,yj)  = psi_ij(index, c)*psi_i(yi, i)*n(yi, i)/m1(yi, c);
                                m0(yj)   += S(yi,yj);
                            }
                        }

                        double k = S.sum();

                        MatrixXd dm0 = dm2.col(c)/k;
                        dm0.array() -= (dm2.col(c).array()*m0.array()).sum()/(k*k);

                        MatrixXd dn  = 0*n.col(i);

                        for(int yj=0; yj<nvals; yj++) {
                            for(int yi=0; yi<nvals; yi++) {
                                int index = yi + yj*nvals;
                                dpsi_ij(index,c) += dm0(yj)*S(yi,yj)/psi_ij(index,c);
                                dpsi_i(yi,i)     += dm0(yj)*S(yi,yj)/psi_i(yi,i);
                                dn(yi)           += dm0(yj)*S(yi,yj)/n(yi,i);
                                dm1(yi,c)        -= dm0(yj)*S(yi,yj)/m1(yi,c);
                            }
                        }

                        for(int yi=0; yi<nvals; yi++) {
                            for(int k=0; k<N1.cols(); k++) {
                                int d = N1(i, k);
                                if(d==-2) continue;
                                dm1(yi,d) += rho*dn(yi)*n(yi,i)/m1(yi,d);
                            }
                            for(int k=0; k<N2.cols(); k++) {
                                int d = N2(i, k);
                                if(d==-2) continue;
                                dm2(yi,d) += rho*dn(yi)*n(yi,i)/m2(yi,d);
                            }
                        }
                        dm2.col(c) *= 0.0;
                        if( dorec )
                            for(int yj=nvals-1; yj>=0; yj--) {
                                w(tree)=w(tree)-1;
                                m2(yj,c)=mstor(w(tree),tree);
                            }
                    } else if( mode==2 ) {
                        for( int yj=0; yj<nvals; yj++)
                            n(yj, j) = 1;
                        for(int k=0; k<N1.cols(); k++) {
                            int d = N1(j, k);
                            if(d==-2) continue;
                            for( int yj=0; yj<nvals; yj++)
                                n(yj, j) *= m1(yj, d);
                        }
                        for(int k=0; k<N2.cols(); k++) {
                            int d = N2(j, k);
                            if(d==-2) continue;
                            for( int yj=0; yj<nvals; yj++)
                                n(yj, j) *= m2(yj, d);
                        }

//                n.col(j) = n.col(j).array().pow(rho);
                        if(rho==1) {
                            //nothing
                        } else if(rho==.5) {
                            n.col(j) = n.col(j).array().sqrt();
                        } else
                            n.col(j) = n.col(j).array().pow(rho);


                        for(int yi=0; yi<nvals; yi++) {
                            m0(yi) = 0;
                            for(int yj=0; yj<nvals; yj++) {
                                int index = yi + yj*nvals;
                                S(yi,yj)  = psi_ij(index, c)*psi_i(yj, j)*n(yj, j)/m2(yj, c);
                                m0(yi)   += S(yi,yj);
                            }
                        }
                        double k = S.sum();

                        MatrixXd dm0 = dm1.col(c)/k;
                        dm0.array() -= (dm1.col(c).array()*m0.array()).sum()/(k*k);
                        MatrixXd dn  = 0*n.col(i);

                        for(int yj=0; yj<nvals; yj++) {
                            for(int yi=0; yi<nvals; yi++) {
                                int index = yi + yj*nvals;
                                dpsi_ij(index,c) += dm0(yi)*S(yi,yj)/psi_ij(index,c);
                                dpsi_i(yj,j)     += dm0(yi)*S(yi,yj)/psi_i(yj,j);
                                dn(yj)           += dm0(yi)*S(yi,yj)/n(yj,j);
                                dm2(yj,c)        -= dm0(yi)*S(yi,yj)/m2(yj,c);
                            }
                        }

                        for(int yj=0; yj<nvals; yj++) {
                            for(int k=0; k<N1.cols(); k++) {
                                int d = N1(j, k);
                                if(d==-2) continue;
                                dm1(yj, d) += rho*dn(yj)*n(yj, j)/m1(yj, d);
                            }
                            for(int k=0; k<N2.cols(); k++) {
                                int d = N2(j, k);
                                if(d==-2) continue;
                                dm2(yj, d) += rho*dn(yj)*n(yj, j)/m2(yj, d);
                            }
                        }

                        dm1.col(c) *= 0.0;
                        if( dorec )
                            for(int yi=nvals-1; yi>=0; yi--) {
                                w(tree)=w(tree)-1;
                                m1(yi,c)=mstor(w(tree),tree);
                            }
                    }
                }
            }
        }
    }
}
Beispiel #16
0
void test_n_voronoi(void)
{
    std::vector<Vector> pos;
    real L = 32;

    Vector a, b, c, d;
    a.x = 1.; a.y = 2.;
    b.x = 2.; b.y = 2.;
    c.x = 2.; c.y = 0.;
    d.x = 4.; d.y = 4.;

    pos = {a, b, c, d};

    MATRIX n(4, 4);
    n.Zero();
    calculate_n_voronoi(n, pos, L);

    MATRIX m(4, 4);
    /*
    0 1 1 1
    1 0 1 1
    1 1 0 1
    1 1 1 0
    */
    m.Set(0, 0, 0); m.Set(0, 1, 1); m.Set(0, 2, 1); m.Set(0, 3, 1);
    m.Set(1, 0, 1); m.Set(1, 1, 0); m.Set(1, 2, 1); m.Set(1, 3, 1);
    m.Set(2, 0, 1); m.Set(2, 1, 1); m.Set(2, 2, 0); m.Set(2, 3, 1);
    m.Set(3, 0, 1); m.Set(3, 1, 1); m.Set(3, 2, 1); m.Set(3, 3, 0);

    assert(n == m);

    /*9 particles Voronoi*/

    MATRIX n2(9, 9);
    n2.Zero();

    Vector v1(10,30), v2(20,30), v3(30,30), v4(10,20), v5(20,20), v6(30,20), v7(10,10), v8(20,10), v9(30,10);
    pos = {v1, v2, v3, v4, v5, v6, v7, v8, v9};
    calculate_n_voronoi(n2, pos, L);

    MATRIX m2(9, 9);
    /*
    0 1 1 1 0 0 1 0 0
    1 0 1 0 1 0 0 1 0
    1 1 0 0 0 1 0 0 1
    1 0 0 0 1 1 1 0 0
    0 1 0 1 0 1 0 1 0
    0 0 1 1 1 0 0 0 1
    1 0 0 1 0 0 0 1 1
    0 1 0 0 1 0 1 0 1
    0 0 1 0 0 1 1 1 0
    */
    m2.Set(0, 0, 0);m2.Set(0, 1, 1);m2.Set(0, 2, 1);m2.Set(0, 3, 1);m2.Set(0, 4, 0);m2.Set(0, 5, 0);m2.Set(0, 6, 1);m2.Set(0, 7, 0);m2.Set(0, 8, 0);
    m2.Set(1, 0, 1);m2.Set(1, 1, 0);m2.Set(1, 2, 1);m2.Set(1, 3, 0);m2.Set(1, 4, 1);m2.Set(1, 5, 0);m2.Set(1, 6, 0);m2.Set(1, 7, 1);m2.Set(1, 8, 0);
    m2.Set(2, 0, 1);m2.Set(2, 1, 1);m2.Set(2, 2, 0);m2.Set(2, 3, 0);m2.Set(2, 4, 0);m2.Set(2, 5, 1);m2.Set(2, 6, 0);m2.Set(2, 7, 0);m2.Set(2, 8, 1);
    m2.Set(3, 0, 1);m2.Set(3, 1, 0);m2.Set(3, 2, 0);m2.Set(3, 3, 0);m2.Set(3, 4, 1);m2.Set(3, 5, 1);m2.Set(3, 6, 1);m2.Set(3, 7, 0);m2.Set(3, 8, 0);
    m2.Set(4, 0, 0);m2.Set(4, 1, 1);m2.Set(4, 2, 0);m2.Set(4, 3, 1);m2.Set(4, 4, 0);m2.Set(4, 5, 1);m2.Set(4, 6, 0);m2.Set(4, 7, 1);m2.Set(4, 8, 0);
    m2.Set(5, 0, 0);m2.Set(5, 1, 0);m2.Set(5, 2, 1);m2.Set(5, 3, 1);m2.Set(5, 4, 1);m2.Set(5, 5, 0);m2.Set(5, 6, 0);m2.Set(5, 7, 0);m2.Set(5, 8, 1);
    m2.Set(6, 0, 1);m2.Set(6, 1, 0);m2.Set(6, 2, 0);m2.Set(6, 3, 1);m2.Set(6, 4, 0);m2.Set(6, 5, 0);m2.Set(6, 6, 0);m2.Set(6, 7, 1);m2.Set(6, 8, 1);
    m2.Set(7, 0, 0);m2.Set(7, 1, 1);m2.Set(7, 2, 0);m2.Set(7, 3, 0);m2.Set(7, 4, 1);m2.Set(7, 5, 0);m2.Set(7, 6, 1);m2.Set(7, 7, 0);m2.Set(7, 8, 1);
    m2.Set(8, 0, 0);m2.Set(8, 1, 0);m2.Set(8, 2, 1);m2.Set(8, 3, 0);m2.Set(8, 4, 0);m2.Set(8, 5, 1);m2.Set(8, 6, 1);m2.Set(8, 7, 1);m2.Set(8, 8, 0);
    assert(n2 == m2);
}
  void RebalanceTransferFactory<Scalar, LocalOrdinal, GlobalOrdinal, Node, LocalMatOps>::Build(Level& fineLevel, Level& coarseLevel) const {
    FactoryMonitor m(*this, "Build", coarseLevel);

    const ParameterList& pL = GetParameterList();

    int implicit   = !pL.get<bool>("repartition: rebalance P and R");
    int writeStart = pL.get<int> ("write start");
    int writeEnd   = pL.get<int> ("write end");

    if (writeStart == 0 && fineLevel.GetLevelID() == 0 && writeStart <= writeEnd && IsAvailable(fineLevel, "Coordinates")) {
      std::string fileName = "coordinates_level_0.m";
      RCP<MultiVector> fineCoords = fineLevel.Get< RCP<MultiVector> >("Coordinates");
      if (fineCoords != Teuchos::null)
        Utils::Write(fileName, *fineCoords);
    }

    RCP<const Import> importer = Get<RCP<const Import> >(coarseLevel, "Importer");
    if (implicit) {
      // Save the importer, we'll need it for solve
      coarseLevel.Set("Importer", importer, NoFactory::get());
    }

    RCP<ParameterList> params = rcp(new ParameterList());;
    params->set("printLoadBalancingInfo", true);
    params->set("printCommInfo",          true);

    std::string transferType = pL.get<std::string>("type");
    if (transferType == "Interpolation") {
      RCP<Matrix> originalP = Get< RCP<Matrix> >(coarseLevel, "P");

      {
        // This line must be after the Get call
        SubFactoryMonitor m1(*this, "Rebalancing prolongator", coarseLevel);

        if (implicit || importer.is_null()) {
          GetOStream(Runtime0) << "Using original prolongator" << std::endl;
          Set(coarseLevel, "P", originalP);

        } else {
          // P is the transfer operator from the coarse grid to the fine grid.
          // P must transfer the data from the newly reordered coarse A to the
          // (unchanged) fine A.  This means that the domain map (coarse) of P
          // must be changed according to the new partition. The range map
          // (fine) is kept unchanged.
          //
          // The domain map of P must match the range map of R.  See also note
          // below about domain/range map of R and its implications for P.
          //
          // To change the domain map of P, P needs to be fillCompleted again
          // with the new domain map.  To achieve this, P is copied into a new
          // matrix that is not fill-completed.  The doImport() operation is
          // just used here to make a copy of P: the importer is trivial and
          // there is no data movement involved.  The reordering actually
          // happens during the fillComplete() with domainMap == importer->getTargetMap().
          RCP<Matrix> rebalancedP = originalP;
          RCP<const CrsMatrixWrap> crsOp = rcp_dynamic_cast<const CrsMatrixWrap>(originalP);
          TEUCHOS_TEST_FOR_EXCEPTION(crsOp == Teuchos::null, Exceptions::BadCast, "Cast from Xpetra::Matrix to Xpetra::CrsMatrixWrap failed");

          RCP<CrsMatrix> rebalancedP2 = crsOp->getCrsMatrix();
          TEUCHOS_TEST_FOR_EXCEPTION(rebalancedP2 == Teuchos::null, std::runtime_error, "Xpetra::CrsMatrixWrap doesn't have a CrsMatrix");

          {
            SubFactoryMonitor subM(*this, "Rebalancing prolongator -- fast map replacement", coarseLevel);

            RCP<const Import> newImporter = ImportFactory::Build(importer->getTargetMap(), rebalancedP->getColMap());
            rebalancedP2->replaceDomainMapAndImporter(importer->getTargetMap(), newImporter);
          }

          ///////////////////////// EXPERIMENTAL
          // TODO FIXME somehow we have to transfer the striding information of the permuted domain/range maps.
          // That is probably something for an external permutation factory
          //   if (originalP->IsView("stridedMaps"))
          //     rebalancedP->CreateView("stridedMaps", originalP);
          ///////////////////////// EXPERIMENTAL

          Set(coarseLevel, "P", rebalancedP);

          if (IsPrint(Statistics1))
            GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*rebalancedP, "P (rebalanced)", params);
        }
      }

      if (importer.is_null()) {
        if (IsAvailable(coarseLevel, "Nullspace"))
          Set(coarseLevel, "Nullspace", Get<RCP<MultiVector> >(coarseLevel, "Nullspace"));

        if (pL.isParameter("Coordinates") && pL.get< RCP<const FactoryBase> >("Coordinates") != Teuchos::null)
          if (IsAvailable(coarseLevel, "Coordinates"))
            Set(coarseLevel, "Coordinates", Get< RCP<MultiVector> >(coarseLevel, "Coordinates"));

        return;
      }

      if (pL.isParameter("Coordinates") &&
          pL.get< RCP<const FactoryBase> >("Coordinates") != Teuchos::null &&
          IsAvailable(coarseLevel, "Coordinates")) {
        RCP<MultiVector> coords = Get<RCP<MultiVector> >(coarseLevel, "Coordinates");

        // This line must be after the Get call
        SubFactoryMonitor subM(*this, "Rebalancing coordinates", coarseLevel);

        LO nodeNumElts = coords->getMap()->getNodeNumElements();

        // If a process has no matrix rows, then we can't calculate blocksize using the formula below.
        LO myBlkSize = 0, blkSize = 0;
        if (nodeNumElts > 0)
          myBlkSize = importer->getSourceMap()->getNodeNumElements() / nodeNumElts;
        maxAll(coords->getMap()->getComm(), myBlkSize, blkSize);

        RCP<const Import> coordImporter;
        if (blkSize == 1) {
          coordImporter = importer;

        } else {
          // NOTE: there is an implicit assumption here: we assume that dof any node are enumerated consequently
          // Proper fix would require using decomposition similar to how we construct importer in the
          // RepartitionFactory
          RCP<const Map> origMap   = coords->getMap();
          GO             indexBase = origMap->getIndexBase();

          ArrayView<const GO> OEntries   = importer->getTargetMap()->getNodeElementList();
          LO                  numEntries = OEntries.size()/blkSize;
          ArrayRCP<GO> Entries(numEntries);
          for (LO i = 0; i < numEntries; i++)
            Entries[i] = (OEntries[i*blkSize]-indexBase)/blkSize + indexBase;

          RCP<const Map> targetMap = MapFactory::Build(origMap->lib(), origMap->getGlobalNumElements(), Entries(), indexBase, origMap->getComm());
          coordImporter = ImportFactory::Build(origMap, targetMap);
        }

        RCP<MultiVector> permutedCoords  = MultiVectorFactory::Build(coordImporter->getTargetMap(), coords->getNumVectors());
        permutedCoords->doImport(*coords, *coordImporter, Xpetra::INSERT);

        if (pL.get<bool>("useSubcomm") == true)
          permutedCoords->replaceMap(permutedCoords->getMap()->removeEmptyProcesses());

        Set(coarseLevel, "Coordinates", permutedCoords);

        std::string fileName = "rebalanced_coordinates_level_" + toString(coarseLevel.GetLevelID()) + ".m";
        if (writeStart <= coarseLevel.GetLevelID() && coarseLevel.GetLevelID() <= writeEnd && permutedCoords->getMap() != Teuchos::null)
          Utils::Write(fileName, *permutedCoords);
      }

      if (IsAvailable(coarseLevel, "Nullspace")) {
        RCP<MultiVector> nullspace = Get< RCP<MultiVector> >(coarseLevel, "Nullspace");

        // This line must be after the Get call
        SubFactoryMonitor subM(*this, "Rebalancing nullspace", coarseLevel);

        RCP<MultiVector> permutedNullspace = MultiVectorFactory::Build(importer->getTargetMap(), nullspace->getNumVectors());
        permutedNullspace->doImport(*nullspace, *importer, Xpetra::INSERT);

        if (pL.get<bool>("useSubcomm") == true)
          permutedNullspace->replaceMap(permutedNullspace->getMap()->removeEmptyProcesses());

        Set(coarseLevel, "Nullspace", permutedNullspace);
      }

    } else {
      if (pL.get<bool>("transpose: use implicit") == false) {
        RCP<Matrix> originalR = Get< RCP<Matrix> >(coarseLevel, "R");

        SubFactoryMonitor m2(*this, "Rebalancing restriction", coarseLevel);

        if (implicit || importer.is_null()) {
          GetOStream(Runtime0) << "Using original restrictor" << std::endl;
          Set(coarseLevel, "R", originalR);

        } else {
          RCP<Matrix> rebalancedR;
          {
            SubFactoryMonitor subM(*this, "Rebalancing restriction -- fusedImport", coarseLevel);

            RCP<Map> dummy;         // meaning: use originalR's domain map.
            rebalancedR = MatrixFactory::Build(originalR, *importer, dummy, importer->getTargetMap());
          }
          Set(coarseLevel, "R", rebalancedR);

          ///////////////////////// EXPERIMENTAL
          // TODO FIXME somehow we have to transfer the striding information of the permuted domain/range maps.
          // That is probably something for an external permutation factory
          // if (originalR->IsView("stridedMaps"))
          //   rebalancedR->CreateView("stridedMaps", originalR);
          ///////////////////////// EXPERIMENTAL

          if (IsPrint(Statistics1))
            GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*rebalancedR, "R (rebalanced)", params);
        }
      }
    }
  }
template<typename SparseMatrixType> void sparse_basic(const SparseMatrixType& ref)
{
  const int rows = ref.rows();
  const int cols = ref.cols();
  typedef typename SparseMatrixType::Scalar Scalar;
  enum { Flags = SparseMatrixType::Flags };
  
  double density = std::max(8./(rows*cols), 0.01);
  typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
  typedef Matrix<Scalar,Dynamic,1> DenseVector;
  Scalar eps = 1e-6;

  SparseMatrixType m(rows, cols);
  DenseMatrix refMat = DenseMatrix::Zero(rows, cols);
  DenseVector vec1 = DenseVector::Random(rows);
  Scalar s1 = ei_random<Scalar>();

  std::vector<Vector2i> zeroCoords;
  std::vector<Vector2i> nonzeroCoords;
  initSparse<Scalar>(density, refMat, m, 0, &zeroCoords, &nonzeroCoords);
  
  if (zeroCoords.size()==0 || nonzeroCoords.size()==0)
    return;

  // test coeff and coeffRef
  for (int i=0; i<(int)zeroCoords.size(); ++i)
  {
    VERIFY_IS_MUCH_SMALLER_THAN( m.coeff(zeroCoords[i].x(),zeroCoords[i].y()), eps );
    if(ei_is_same_type<SparseMatrixType,SparseMatrix<Scalar,Flags> >::ret)
      VERIFY_RAISES_ASSERT( m.coeffRef(zeroCoords[0].x(),zeroCoords[0].y()) = 5 );
  }
  VERIFY_IS_APPROX(m, refMat);

  m.coeffRef(nonzeroCoords[0].x(), nonzeroCoords[0].y()) = Scalar(5);
  refMat.coeffRef(nonzeroCoords[0].x(), nonzeroCoords[0].y()) = Scalar(5);

  VERIFY_IS_APPROX(m, refMat);
  /*
  // test InnerIterators and Block expressions
  for (int t=0; t<10; ++t)
  {
    int j = ei_random<int>(0,cols-1);
    int i = ei_random<int>(0,rows-1);
    int w = ei_random<int>(1,cols-j-1);
    int h = ei_random<int>(1,rows-i-1);

//     VERIFY_IS_APPROX(m.block(i,j,h,w), refMat.block(i,j,h,w));
    for(int c=0; c<w; c++)
    {
      VERIFY_IS_APPROX(m.block(i,j,h,w).col(c), refMat.block(i,j,h,w).col(c));
      for(int r=0; r<h; r++)
      {
//         VERIFY_IS_APPROX(m.block(i,j,h,w).col(c).coeff(r), refMat.block(i,j,h,w).col(c).coeff(r));
      }
    }
//     for(int r=0; r<h; r++)
//     {
//       VERIFY_IS_APPROX(m.block(i,j,h,w).row(r), refMat.block(i,j,h,w).row(r));
//       for(int c=0; c<w; c++)
//       {
//         VERIFY_IS_APPROX(m.block(i,j,h,w).row(r).coeff(c), refMat.block(i,j,h,w).row(r).coeff(c));
//       }
//     }
  }

  for(int c=0; c<cols; c++)
  {
    VERIFY_IS_APPROX(m.col(c) + m.col(c), (m + m).col(c));
    VERIFY_IS_APPROX(m.col(c) + m.col(c), refMat.col(c) + refMat.col(c));
  }

  for(int r=0; r<rows; r++)
  {
    VERIFY_IS_APPROX(m.row(r) + m.row(r), (m + m).row(r));
    VERIFY_IS_APPROX(m.row(r) + m.row(r), refMat.row(r) + refMat.row(r));
  }
  */

  // test SparseSetters
  // coherent setter
  // TODO extend the MatrixSetter
//   {
//     m.setZero();
//     VERIFY_IS_NOT_APPROX(m, refMat);
//     SparseSetter<SparseMatrixType, FullyCoherentAccessPattern> w(m);
//     for (int i=0; i<nonzeroCoords.size(); ++i)
//     {
//       w->coeffRef(nonzeroCoords[i].x(),nonzeroCoords[i].y()) = refMat.coeff(nonzeroCoords[i].x(),nonzeroCoords[i].y());
//     }
//   }
//   VERIFY_IS_APPROX(m, refMat);

  // random setter
//   {
//     m.setZero();
//     VERIFY_IS_NOT_APPROX(m, refMat);
//     SparseSetter<SparseMatrixType, RandomAccessPattern> w(m);
//     std::vector<Vector2i> remaining = nonzeroCoords;
//     while(!remaining.empty())
//     {
//       int i = ei_random<int>(0,remaining.size()-1);
//       w->coeffRef(remaining[i].x(),remaining[i].y()) = refMat.coeff(remaining[i].x(),remaining[i].y());
//       remaining[i] = remaining.back();
//       remaining.pop_back();
//     }
//   }
//   VERIFY_IS_APPROX(m, refMat);

    VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, StdMapTraits> >(m,refMat,nonzeroCoords) ));
    #ifdef EIGEN_UNORDERED_MAP_SUPPORT
    VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, StdUnorderedMapTraits> >(m,refMat,nonzeroCoords) ));
    #endif
    #ifdef _DENSE_HASH_MAP_H_
    VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, GoogleDenseHashMapTraits> >(m,refMat,nonzeroCoords) ));
    #endif
    #ifdef _SPARSE_HASH_MAP_H_
    VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, GoogleSparseHashMapTraits> >(m,refMat,nonzeroCoords) ));
    #endif

    // test fillrand
    {
      DenseMatrix m1(rows,cols);
      m1.setZero();
      SparseMatrixType m2(rows,cols);
      m2.startFill();
      for (int j=0; j<cols; ++j)
      {
        for (int k=0; k<rows/2; ++k)
        {
          int i = ei_random<int>(0,rows-1);
          if (m1.coeff(i,j)==Scalar(0))
            m2.fillrand(i,j) = m1(i,j) = ei_random<Scalar>();
        }
      }
      m2.endFill();
      VERIFY_IS_APPROX(m2,m1);
    }
  
  // test RandomSetter
  /*{
    SparseMatrixType m1(rows,cols), m2(rows,cols);
    DenseMatrix refM1 = DenseMatrix::Zero(rows, rows);
    initSparse<Scalar>(density, refM1, m1);
    {
      Eigen::RandomSetter<SparseMatrixType > setter(m2);
      for (int j=0; j<m1.outerSize(); ++j)
        for (typename SparseMatrixType::InnerIterator i(m1,j); i; ++i)
          setter(i.index(), j) = i.value();
    }
    VERIFY_IS_APPROX(m1, m2);
  }*/
//   std::cerr << m.transpose() << "\n\n"  << refMat.transpose() << "\n\n";
//   VERIFY_IS_APPROX(m, refMat);

  // test basic computations
  {
    DenseMatrix refM1 = DenseMatrix::Zero(rows, rows);
    DenseMatrix refM2 = DenseMatrix::Zero(rows, rows);
    DenseMatrix refM3 = DenseMatrix::Zero(rows, rows);
    DenseMatrix refM4 = DenseMatrix::Zero(rows, rows);
    SparseMatrixType m1(rows, rows);
    SparseMatrixType m2(rows, rows);
    SparseMatrixType m3(rows, rows);
    SparseMatrixType m4(rows, rows);
    initSparse<Scalar>(density, refM1, m1);
    initSparse<Scalar>(density, refM2, m2);
    initSparse<Scalar>(density, refM3, m3);
    initSparse<Scalar>(density, refM4, m4);

    VERIFY_IS_APPROX(m1+m2, refM1+refM2);
    VERIFY_IS_APPROX(m1+m2+m3, refM1+refM2+refM3);
    VERIFY_IS_APPROX(m3.cwise()*(m1+m2), refM3.cwise()*(refM1+refM2));
    VERIFY_IS_APPROX(m1*s1-m2, refM1*s1-refM2);

    VERIFY_IS_APPROX(m1*=s1, refM1*=s1);
    VERIFY_IS_APPROX(m1/=s1, refM1/=s1);
    
    VERIFY_IS_APPROX(m1+=m2, refM1+=refM2);
    VERIFY_IS_APPROX(m1-=m2, refM1-=refM2);
    
    VERIFY_IS_APPROX(m1.col(0).eigen2_dot(refM2.row(0)), refM1.col(0).eigen2_dot(refM2.row(0)));
    
    refM4.setRandom();
    // sparse cwise* dense
    VERIFY_IS_APPROX(m3.cwise()*refM4, refM3.cwise()*refM4);
//     VERIFY_IS_APPROX(m3.cwise()/refM4, refM3.cwise()/refM4);
  }

  // test innerVector()
  {
    DenseMatrix refMat2 = DenseMatrix::Zero(rows, rows);
    SparseMatrixType m2(rows, rows);
    initSparse<Scalar>(density, refMat2, m2);
    int j0 = ei_random(0,rows-1);
    int j1 = ei_random(0,rows-1);
    VERIFY_IS_APPROX(m2.innerVector(j0), refMat2.col(j0));
    VERIFY_IS_APPROX(m2.innerVector(j0)+m2.innerVector(j1), refMat2.col(j0)+refMat2.col(j1));
    //m2.innerVector(j0) = 2*m2.innerVector(j1);
    //refMat2.col(j0) = 2*refMat2.col(j1);
    //VERIFY_IS_APPROX(m2, refMat2);
  }
  
  // test innerVectors()
  {
    DenseMatrix refMat2 = DenseMatrix::Zero(rows, rows);
    SparseMatrixType m2(rows, rows);
    initSparse<Scalar>(density, refMat2, m2);
    int j0 = ei_random(0,rows-2);
    int j1 = ei_random(0,rows-2);
    int n0 = ei_random<int>(1,rows-std::max(j0,j1));
    VERIFY_IS_APPROX(m2.innerVectors(j0,n0), refMat2.block(0,j0,rows,n0));
    VERIFY_IS_APPROX(m2.innerVectors(j0,n0)+m2.innerVectors(j1,n0),
                     refMat2.block(0,j0,rows,n0)+refMat2.block(0,j1,rows,n0));
    //m2.innerVectors(j0,n0) = m2.innerVectors(j0,n0) + m2.innerVectors(j1,n0);
    //refMat2.block(0,j0,rows,n0) = refMat2.block(0,j0,rows,n0) + refMat2.block(0,j1,rows,n0);
  }

  // test transpose
  {
    DenseMatrix refMat2 = DenseMatrix::Zero(rows, rows);
    SparseMatrixType m2(rows, rows);
    initSparse<Scalar>(density, refMat2, m2);
    VERIFY_IS_APPROX(m2.transpose().eval(), refMat2.transpose().eval());
    VERIFY_IS_APPROX(m2.transpose(), refMat2.transpose());
  }
  
  // test prune
  {
    SparseMatrixType m2(rows, rows);
    DenseMatrix refM2(rows, rows);
    refM2.setZero();
    int countFalseNonZero = 0;
    int countTrueNonZero = 0;
    m2.startFill();
    for (int j=0; j<m2.outerSize(); ++j)
      for (int i=0; i<m2.innerSize(); ++i)
      {
        float x = ei_random<float>(0,1);
        if (x<0.1)
        {
          // do nothing
        }
        else if (x<0.5)
        {
          countFalseNonZero++;
          m2.fill(i,j) = Scalar(0);
        }
        else
        {
          countTrueNonZero++;
          m2.fill(i,j) = refM2(i,j) = Scalar(1);
        }
      }
    m2.endFill();
    VERIFY(countFalseNonZero+countTrueNonZero == m2.nonZeros());
    VERIFY_IS_APPROX(m2, refM2);
    m2.prune(1);
    VERIFY(countTrueNonZero==m2.nonZeros());
    VERIFY_IS_APPROX(m2, refM2);
  }
}
int vandermonde_test(ScalarType epsilon)
{
    std::size_t VANDERMONDE_SIZE = 61;

    viennacl::vandermonde_matrix<ScalarType> vcl_vandermonde1(VANDERMONDE_SIZE, VANDERMONDE_SIZE);
    viennacl::vandermonde_matrix<ScalarType> vcl_vandermonde2(VANDERMONDE_SIZE, VANDERMONDE_SIZE);

    viennacl::vector<ScalarType> vcl_input(VANDERMONDE_SIZE);
    viennacl::vector<ScalarType> vcl_result(VANDERMONDE_SIZE);

    std::vector<ScalarType> input_ref(VANDERMONDE_SIZE);
    std::vector<ScalarType> result_ref(VANDERMONDE_SIZE);

    dense_matrix<ScalarType> m1(vcl_vandermonde1.size1(), vcl_vandermonde1.size2());
    dense_matrix<ScalarType> m2(m1.size1(), m1.size2());

    for (std::size_t i = 0; i < m1.size1(); i++)
      for (std::size_t j = 0; j < m1.size2(); j++)
      {
        m1(i,j) = std::pow(ScalarType(1.0) + ScalarType(i)/ScalarType(1000.0), ScalarType(j));
        m2(i,j) = std::pow(ScalarType(1.0) - ScalarType(i)/ScalarType(2000.0), ScalarType(j));
      }

    for (std::size_t i = 0; i < input_ref.size(); i++)
      input_ref[i] = ScalarType(i);

    // Copy to ViennaCL
    viennacl::copy(m1, vcl_vandermonde1);
    viennacl::copy(m2, vcl_vandermonde2);
    viennacl::copy(input_ref, vcl_input);

    //
    // Matrix-Vector product:
    //
    vcl_result = viennacl::linalg::prod(vcl_vandermonde1, vcl_input);

    for (std::size_t i = 0; i < m1.size1(); i++)     //reference calculation
    {
      ScalarType entry = 0;
      for (std::size_t j = 0; j < m1.size2(); j++)
        entry += m1(i,j) * input_ref[j];

      result_ref[i] = entry;
    }

    viennacl::copy(vcl_result, input_ref);
    std::cout << "Matrix-Vector Product: " << diff_max(input_ref, result_ref);
    if (diff_max(input_ref, result_ref) < epsilon)
      std::cout << " [OK]" << std::endl;
    else
    {
      for (std::size_t i=0; i<input_ref.size(); ++i)
        std::cout << "Should: " << result_ref[i] << ", is: " << input_ref[i] << std::endl;
      std::cout << " [FAILED]" << std::endl;
      return EXIT_FAILURE;
    }


    //
    // Note: Matrix addition does not make sense for a Vandermonde matrix
    //


    //
    // Per-Element access:
    //
    vcl_vandermonde1(4) = static_cast<ScalarType>(1.0001);

    for (std::size_t j = 0; j < m1.size2(); j++)
    {
      m1(4, j) = std::pow(ScalarType(1.0001), ScalarType(j));
    }

    viennacl::copy(vcl_vandermonde1, m2);
    std::cout << "Element manipulation: " << diff(m1, m2);
    if (diff(m1, m2) < epsilon)
      std::cout << " [OK]" << std::endl;
    else
    {
      std::cout << " [FAILED]" << std::endl;
      return EXIT_FAILURE;
    }

    return EXIT_SUCCESS;
}
Beispiel #20
0
int main()
{
    {
        typedef std::pair<MoveOnly, MoveOnly> V;
        typedef std::pair<const MoveOnly, MoveOnly> VC;
        typedef test_compare<std::less<MoveOnly> > C;
        typedef test_allocator<VC> A;
        typedef std::map<MoveOnly, MoveOnly, C, A> M;
        typedef std::move_iterator<V*> I;
        V a1[] =
        {
            V(1, 1),
            V(1, 2),
            V(1, 3),
            V(2, 1),
            V(2, 2),
            V(2, 3),
            V(3, 1),
            V(3, 2),
            V(3, 3)
        };
        M m1(I(a1), I(a1+sizeof(a1)/sizeof(a1[0])), C(5), A(7));
        V a2[] =
        {
            V(1, 1),
            V(1, 2),
            V(1, 3),
            V(2, 1),
            V(2, 2),
            V(2, 3),
            V(3, 1),
            V(3, 2),
            V(3, 3)
        };
        M m2(I(a2), I(a2+sizeof(a2)/sizeof(a2[0])), C(5), A(7));
        M m3(C(3), A(7));
        m3 = std::move(m1);
        assert(m3 == m2);
        assert(m3.get_allocator() == A(7));
        assert(m3.key_comp() == C(5));
        assert(m1.empty());
    }
    {
        typedef std::pair<MoveOnly, MoveOnly> V;
        typedef std::pair<const MoveOnly, MoveOnly> VC;
        typedef test_compare<std::less<MoveOnly> > C;
        typedef test_allocator<VC> A;
        typedef std::map<MoveOnly, MoveOnly, C, A> M;
        typedef std::move_iterator<V*> I;
        V a1[] =
        {
            V(1, 1),
            V(1, 2),
            V(1, 3),
            V(2, 1),
            V(2, 2),
            V(2, 3),
            V(3, 1),
            V(3, 2),
            V(3, 3)
        };
        M m1(I(a1), I(a1+sizeof(a1)/sizeof(a1[0])), C(5), A(7));
        V a2[] =
        {
            V(1, 1),
            V(1, 2),
            V(1, 3),
            V(2, 1),
            V(2, 2),
            V(2, 3),
            V(3, 1),
            V(3, 2),
            V(3, 3)
        };
        M m2(I(a2), I(a2+sizeof(a2)/sizeof(a2[0])), C(5), A(7));
        M m3(C(3), A(5));
        m3 = std::move(m1);
        assert(m3 == m2);
        assert(m3.get_allocator() == A(5));
        assert(m3.key_comp() == C(5));
        assert(m1.empty());
    }
    {
        typedef std::pair<MoveOnly, MoveOnly> V;
        typedef std::pair<const MoveOnly, MoveOnly> VC;
        typedef test_compare<std::less<MoveOnly> > C;
        typedef other_allocator<VC> A;
        typedef std::map<MoveOnly, MoveOnly, C, A> M;
        typedef std::move_iterator<V*> I;
        V a1[] =
        {
            V(1, 1),
            V(1, 2),
            V(1, 3),
            V(2, 1),
            V(2, 2),
            V(2, 3),
            V(3, 1),
            V(3, 2),
            V(3, 3)
        };
        M m1(I(a1), I(a1+sizeof(a1)/sizeof(a1[0])), C(5), A(7));
        V a2[] =
        {
            V(1, 1),
            V(1, 2),
            V(1, 3),
            V(2, 1),
            V(2, 2),
            V(2, 3),
            V(3, 1),
            V(3, 2),
            V(3, 3)
        };
        M m2(I(a2), I(a2+sizeof(a2)/sizeof(a2[0])), C(5), A(7));
        M m3(C(3), A(5));
        m3 = std::move(m1);
        assert(m3 == m2);
        assert(m3.get_allocator() == A(7));
        assert(m3.key_comp() == C(5));
        assert(m1.empty());
    }
    {
        typedef std::pair<MoveOnly, MoveOnly> V;
        typedef std::pair<const MoveOnly, MoveOnly> VC;
        typedef test_compare<std::less<MoveOnly> > C;
        typedef min_allocator<VC> A;
        typedef std::map<MoveOnly, MoveOnly, C, A> M;
        typedef std::move_iterator<V*> I;
        V a1[] =
        {
            V(1, 1),
            V(1, 2),
            V(1, 3),
            V(2, 1),
            V(2, 2),
            V(2, 3),
            V(3, 1),
            V(3, 2),
            V(3, 3)
        };
        M m1(I(a1), I(a1+sizeof(a1)/sizeof(a1[0])), C(5), A());
        V a2[] =
        {
            V(1, 1),
            V(1, 2),
            V(1, 3),
            V(2, 1),
            V(2, 2),
            V(2, 3),
            V(3, 1),
            V(3, 2),
            V(3, 3)
        };
        M m2(I(a2), I(a2+sizeof(a2)/sizeof(a2[0])), C(5), A());
        M m3(C(3), A());
        m3 = std::move(m1);
        assert(m3 == m2);
        assert(m3.get_allocator() == A());
        assert(m3.key_comp() == C(5));
        assert(m1.empty());
    }
}
void
TopographyFileRenderer::PaintLabels(Canvas &canvas,
                                    const WindowProjection &projection,
                                    LabelBlock &label_block)
{
  if (file.IsEmpty())
    return;

  fixed map_scale = projection.GetMapScale();
  if (!file.IsVisible(map_scale) || !file.IsLabelVisible(map_scale))
    return;

  UpdateVisibleShapes(projection);

  if (visible_labels.empty())
    return;

  // TODO code: only draw inside screen!
  // this will save time with rendering pixmaps especially
  // we already do an outer visibility test, but may need a test
  // in screen coords

  canvas.Select(file.IsLabelImportant(map_scale) ?
                Fonts::map_label_important : Fonts::map_label);
  canvas.SetTextColor(Color(0x20, 0x20, 0x20));
  canvas.SetBackgroundTransparent();

  // get drawing info

  int iskip = file.GetSkipSteps(map_scale);

#ifdef ENABLE_OPENGL
  Matrix2D m1;
  m1.Translate(projection.GetScreenOrigin());
  m1.Rotate(projection.GetScreenAngle());
  m1.Scale(projection.GetScale());
#endif

  // Iterate over all shapes in the file
  for (auto it = visible_labels.begin(), end = visible_labels.end();
       it != end; ++it) {
    const XShape &shape = **it;

    if (!projection.GetScreenBounds().Overlaps(shape.get_bounds()))
      continue;

    // Skip shapes without a label
    const TCHAR *label = shape.get_label();
    if (label == NULL)
      continue;

    const unsigned short *lines = shape.get_lines();
    const unsigned short *end_lines = lines + shape.get_number_of_lines();
#ifdef ENABLE_OPENGL
    const ShapePoint *points = shape.get_points();

    Matrix2D m2(m1);
    m2.Translatex(shape.shape_translation(projection.GetGeoLocation()));
#else
    const GeoPoint *points = shape.get_points();
#endif

    for (; lines < end_lines; ++lines) {
      int minx = canvas.get_width();
      int miny = canvas.get_height();

#ifdef ENABLE_OPENGL
      const ShapePoint *end = points + *lines;
#else
      const GeoPoint *end = points + *lines;
#endif
      for (; points < end; points += iskip) {
#ifdef ENABLE_OPENGL
        RasterPoint pt = m2.Apply(*points);
#else
        RasterPoint pt = projection.GeoToScreen(*points);
#endif

        if (pt.x <= minx) {
          minx = pt.x;
          miny = pt.y;
        }
      }

      points = end;

      minx += 2;
      miny += 2;

      PixelSize tsize = canvas.CalcTextSize(label);
      PixelRect brect;
      brect.left = minx;
      brect.right = brect.left + tsize.cx;
      brect.top = miny;
      brect.bottom = brect.top + tsize.cy;

      if (!label_block.check(brect))
        continue;

      canvas.text(minx, miny, label);
    }
  }
}
Beispiel #22
0
zThread::~zThread()
{
 stop();
 join();
 zMutexLock m2(&zThread::ms_zThread);
 zMutexLock m1(&m_zThread);
#if defined(_WIN32) || defined(_WIN64)
 if(hThread != ((HANDLE) NULL))
 {
//  TerminateThread(hThread,0);
  CloseHandle(hThread);
  hThread=(HANDLE) 0;
#else
#if defined(__GNUG__) || defined(__linux__) || defined(__CYGWIN__)
 if(thread != pthread_t())
 {
//  pthread_cancel(thread);
  thread=pthread_t();
#endif
#endif 
  pid=0;
  stopFlag=false;
  suspendFlag=false;
  detachFlag=false;
  vector<zThread*>::iterator k=std::find(zThread::ms_list.begin(),zThread::ms_list.end(),this);
  if(k != zThread::ms_list.end()) { zThread::ms_list.erase(k); } 
 }
}

#if defined(_WIN32) || defined(_WIN64)
DWORD zThread::run_thread_item(LPDWORD attr)
{
 zThread *tr = reinterpret_cast<zThread*>(attr);
 if(tr == NULL) return 0;
 {
  zMutexLock m1(&tr->m_zThread);
  tr->pid=zThread::getTid();
  tr->suspendFlag = false;
  tr->alive=true;
 }
 tr->run();
 {
  zMutexLock m1(&tr->m_zThread);
  tr->pid=0;
 }
 if(tr->detachFlag)
 {
  {
   zMutexLock m1(&tr->m_zThread);
   tr->alive=false;
  }
  delete tr; ExitThread(0); return 0;
 }
 else
 {
  zMutexLock m1(&tr->m_zThread);
  tr->alive=false;
 }
 ExitThread(0);
 return 0; 
}
typedef Matrix<float,1,Dynamic> MatrixType;
typedef Map<MatrixType> MapType;
typedef Map<const MatrixType> MapTypeConst;   // a read-only map
const int n_dims = 5;
  
MatrixType m1(n_dims), m2(n_dims);
m1.setRandom();
m2.setRandom();
float *p = &m2(0);  // get the address storing the data for m2
MapType m2map(p,m2.size());   // m2map shares data with m2
MapTypeConst m2mapconst(p,m2.size());  // a read-only accessor for m2

cout << "m1: " << m1 << endl;
cout << "m2: " << m2 << endl;
cout << "Squared euclidean distance: " << (m1-m2).squaredNorm() << endl;
cout << "Squared euclidean distance, using map: " <<
  (m1-m2map).squaredNorm() << endl;
m2map(3) = 7;   // this will change m2, since they share the same array
cout << "Updated m2: " << m2 << endl;
cout << "m2 coefficient 2, constant accessor: " << m2mapconst(2) << endl;
/* m2mapconst(2) = 5; */   // this yields a compile-time error
Beispiel #24
0
void zThread::resume()
{
 zMutexLock m1(&m_zThread);   
#if defined(_WIN32) || defined(_WIN64)
 if(suspendFlag && !alive && (hThread != ((HANDLE) NULL)))
 {
  suspendFlag=false;
  ResumeThread(hThread);
#else
#if defined(__GNUG__) || defined(__linux__) || defined(__CYGWIN__)
 if(suspendFlag && !alive && (thread != pthread_t()))
 {
  suspendFlag=false;
  pthread_kill(thread, SIGCONT);
#endif
#endif
 }
} 

zThread* zThread::currentThread()
{
 size_t p=zThread::getTid();
 zMutexLock m2(&zThread::ms_zThread);
 for(size_t i=0; i < zThread::ms_list.size(); i++)
 {
  if(p == zThread::ms_list.at(i)->Pid()) return zThread::ms_list.at(i);
 }
 return NULL;
}

size_t zThread::getPid()
{
#if defined(_WIN32) || defined(_WIN64)
 return ((size_t) GetCurrentProcessId());
#else
#if defined(__GNUG__) || defined(__linux__) || defined(__CYGWIN__)
 return ((size_t) getpid());
#endif 
#endif 
};

size_t zThread::getTid()
{
#if defined(_WIN32) || defined(_WIN64)
 return ((size_t) GetCurrentThreadId());
#else
#if defined(__GNUG__) || defined(__linux__) || defined(__CYGWIN__)
 return ((size_t) pthread_self());
#endif 
#endif 
}

void zThread::sleep(time_t timeout)
{
 if(timeout > 0)
 {
#if defined(_WIN32) || defined(_WIN64)
  Sleep(timeout);
#else
#if defined(__GNUG__) || defined(__linux__) || defined(__CYGWIN__)
  struct timespec ts;
  ts.tv_sec = (timeout/1000);
  ts.tv_nsec = ((timeout%1000)*1000000);
  nanosleep(&ts, NULL);
#endif 
#endif 
 }
}

void zThread::stopAll()
{
 zMutexLock m2(&zThread::ms_zThread);
 for(size_t i=0; i < zThread::ms_list.size(); i++)
 { if(zThread::ms_list.at(i) != NULL) zThread::ms_list.at(i)->stop(); }
}
Beispiel #25
0
 void m3 (int x) { char *m = m2 (); if (m1 () > 0 && x > 0); }
Beispiel #26
0
vector<zThread*> zThread::getThreadList()
{
 zMutexLock m2(&zThread::ms_zThread); 
 return zThread::ms_list;
}
void tst_QContactDetail::classHierarchy()
{
    QContactDetail f1;
    QContactDetail f2;

    QVERIFY(f1.isEmpty());
    QVERIFY(f2.isEmpty());

    QContactPhoneNumber p1;
    p1.setNumber("123456");
    QVERIFY(!p1.isEmpty());
    QVERIFY(p1.definitionName() == QContactPhoneNumber::DefinitionName);

    QContactName m1;
    m1.setFirstName("Bob");
    QVERIFY(!m1.isEmpty());
    QVERIFY(m1.definitionName() == QContactName::DefinitionName);

    QVERIFY(p1 != m1);
    QVERIFY(f1 == f2);

    f1 = p1; // f1 is a phonenumber
    QVERIFY(f1 == p1);

    f1 = f1; // assign to itself
    QVERIFY(f1 == f1);
    QVERIFY(f1 == p1);
    QVERIFY(f1 != f2);
    QVERIFY(p1 != f2);

    p1 = p1; // assign leaf class to itself
    QVERIFY(p1 == p1);
    QVERIFY(f1 == p1);
    QVERIFY(p1 == f1);

    f2 = f1; // f2 = f1 = phonenumber
    QVERIFY(f1 == f2);
    QVERIFY(f2 == f1);
    QVERIFY(f2 == p1);
    QVERIFY(f1 == p1);

    f1 = m1; // f1 = name, f2 = phonenumber
    QVERIFY(f1 == m1);
    QVERIFY(f1 != f2);
    QVERIFY(f2 == p1);

    QContactPhoneNumber p2(f2); // p2 = f2 = phonenumber
    QVERIFY(p1 == p2);
    QVERIFY(p1 == f2);
    QCOMPARE(p2.number(), p1.number());
    QCOMPARE(p2.number(), QString("123456"));

    p2 = p1; // phone number to phone number
    QVERIFY(p1 == p2);
    QVERIFY(p1 == f2);
    QCOMPARE(p2.number(), p1.number());
    QCOMPARE(p2.number(), QString("123456"));

    p2.setNumber("5678"); // NOTE: implicitly shared, this has caused a detach so p1 != 2
    QVERIFY(p1 != p2);
    QVERIFY(p1 == f2);
    QVERIFY(p2 != f2);
    QCOMPARE(p2.number(), QString("5678"));
    QCOMPARE(p1.number(), QString("123456"));

    /* Bad assignment */
    p2 = m1; // assign a name to a phone number
    QVERIFY(p2 != m1);
    QVERIFY(p2.definitionName() == QContactPhoneNumber::DefinitionName); // should still be a phone number though
    QVERIFY(p2.isEmpty());

    /* copy ctor */
    QContactName m2(m1);
    QVERIFY(m2 == m1);

    /* another bad assignment */
    m2 = p2; // phone number to a name
    QVERIFY(m2 != m1);
    QVERIFY(m2.definitionName() == QContactName::DefinitionName);
    QVERIFY(m2.isEmpty());

    /* Check contexts are considered for equality */
    p2 = QContactPhoneNumber(); // new id / detach
    p2.setNumber(p1.number());
    p2.setContexts(QContactDetail::ContextHome);
    QVERIFY(p1 != p2);
    p2.removeValue(QContactDetail::FieldContext); // note, context is a value.
    QVERIFY(p1 == p2); // different ids but same values should be equal

    /* Copy ctor from valid type */
    QContactDetail f3(p2);
    QVERIFY(f3 == p2);
    QVERIFY(f3.definitionName() == QContactPhoneNumber::DefinitionName);

    /* Copy ctor from invalid type */
    QContactPhoneNumber p3(m1);
    QVERIFY(p3 != m1);
    QVERIFY(p3.definitionName() == QContactPhoneNumber::DefinitionName);
    QVERIFY(p3.isEmpty());

    /* Copy ctore from invalid type, through base type */
    f3 = m1;
    QContactPhoneNumber p4(f3);
    QVERIFY(p4 != f3);
    QVERIFY(p4.definitionName() == QContactPhoneNumber::DefinitionName);
    QVERIFY(p4.isEmpty());

    /* Try a reference */
    p1.setNumber("123456");
    QContactDetail& ref = p1;
    QVERIFY(p1.number() == "123456");
    QVERIFY(p1.value(QContactPhoneNumber::FieldNumber) == "123456");
    QVERIFY(ref.value(QContactPhoneNumber::FieldNumber) == "123456");
    QVERIFY(p1 == ref);
    QVERIFY(ref == p1);

    /* Try changing the original */
    p1.setNumber("56789");
    QVERIFY(p1.number() == "56789");
    QVERIFY(p1.value(QContactPhoneNumber::FieldNumber) == "56789");
    QVERIFY(ref.value(QContactPhoneNumber::FieldNumber) == "56789");
    QVERIFY(p1 == ref);
    QVERIFY(ref == p1);

    /* Try changing the reference */
    ref.setValue(QContactPhoneNumber::FieldNumber, "654321");
    QVERIFY(p1.number() == "654321");
    QVERIFY(p1.value(QContactPhoneNumber::FieldNumber) == "654321");
    QVERIFY(ref.value(QContactPhoneNumber::FieldNumber) == "654321");
    QVERIFY(p1 == ref);
    QVERIFY(ref == p1);

    /* Random other test */
    NonMacroCustomDetail md;
    QVERIFY(md.definitionName() == "malicious");
    QVERIFY(md.setValue("key", "value"));
    QVERIFY(!md.isEmpty());
    md.doAssign(md); // self assignment
    QVERIFY(!md.isEmpty());
    QVERIFY(md.value("key") == "value");

    QContactDetail mdv;
    mdv = md;
    QVERIFY(mdv.definitionName() == "malicious");
    QVERIFY(mdv.value("key") == "value");

    md = mdv;
    QVERIFY(md.definitionName() == "malicious");
    QVERIFY(md.value("key") == "value");

    NonMacroCustomDetail2 md2;
    QVERIFY(md2.setValue("key", "value"));
    QVERIFY(md2.definitionName() == "malicious");
    QVERIFY(md2.value("key") == "value");
    md2.doAssign(md);
    QVERIFY(md2 == md);
    md2 = md;
    QVERIFY(md.definitionName() == "malicious");
    QVERIFY(md.value("key") == "value");

    // Self assignment
    md2.doAssign(md2);
    QVERIFY(md2.definitionName() == "malicious");
    QVERIFY(md2.value("key") == "value");

    md.doAssign(md2);
    QVERIFY(md == md2);

    // Assigning something else
    QContactPhoneNumber pn;
    pn.setNumber("12345");
    md2.doAssign(pn);
    QVERIFY(md2.isEmpty());
    QVERIFY(md2.definitionName() == "malicious");

    NonMacroCustomDetail mdb(pn);
    QVERIFY(mdb.isEmpty());
    QVERIFY(mdb.definitionName() == "malicious");

    NonMacroCustomDetail2 md2b(pn);
    QVERIFY(md2b.isEmpty());
    QVERIFY(md2b.definitionName() == "malicious");
}
Beispiel #28
0
size_t zThread::size()
{
 zMutexLock m2(&zThread::ms_zThread); 
 return zThread::ms_list.size();
}
  void SaPFactory_kokkos<Scalar,LocalOrdinal,GlobalOrdinal,Kokkos::Compat::KokkosDeviceWrapperNode<DeviceType>>::BuildP(Level& fineLevel, Level& coarseLevel) const {
    FactoryMonitor m(*this, "Prolongator smoothing", coarseLevel);

    // Add debugging information
    DeviceType::execution_space::print_configuration(GetOStream(Runtime1));

    typedef typename Teuchos::ScalarTraits<SC>::magnitudeType Magnitude;

    // Get default tentative prolongator factory
    // Getting it that way ensure that the same factory instance will be used for both SaPFactory_kokkos and NullspaceFactory.
    // -- Warning: Do not use directly initialPFact_. Use initialPFact instead everywhere!
    RCP<const FactoryBase> initialPFact = GetFactory("P");
    if (initialPFact == Teuchos::null) { initialPFact = coarseLevel.GetFactoryManager()->GetFactory("Ptent"); }

    // Level Get
    RCP<Matrix> A     = Get< RCP<Matrix> >(fineLevel, "A");
    RCP<Matrix> Ptent = coarseLevel.Get< RCP<Matrix> >("P", initialPFact.get());

    if(restrictionMode_) {
      SubFactoryMonitor m2(*this, "Transpose A", coarseLevel);

      A = Utilities_kokkos::Transpose(*A, true); // build transpose of A explicitly
    }

    //Build final prolongator
    RCP<Matrix> finalP; // output

    const ParameterList& pL = GetParameterList();
    SC dampingFactor      = as<SC>(pL.get<double>("sa: damping factor"));
    LO maxEigenIterations = as<LO>(pL.get<int>("sa: eigenvalue estimate num iterations"));
    bool estimateMaxEigen = pL.get<bool>("sa: calculate eigenvalue estimate");
    if (dampingFactor != Teuchos::ScalarTraits<SC>::zero()) {

      SC lambdaMax;
      {
        SubFactoryMonitor m2(*this, "Eigenvalue estimate", coarseLevel);
        lambdaMax = A->GetMaxEigenvalueEstimate();
        if (lambdaMax == -Teuchos::ScalarTraits<SC>::one() || estimateMaxEigen) {
          GetOStream(Statistics1) << "Calculating max eigenvalue estimate now (max iters = "<< maxEigenIterations << ")" << std::endl;
          Magnitude stopTol = 1e-4;
          lambdaMax = Utilities_kokkos::PowerMethod(*A, true, maxEigenIterations, stopTol);
          A->SetMaxEigenvalueEstimate(lambdaMax);
        } else {
          GetOStream(Statistics1) << "Using cached max eigenvalue estimate" << std::endl;
        }
        GetOStream(Statistics0) << "Prolongator damping factor = " << dampingFactor/lambdaMax << " (" << dampingFactor << " / " << lambdaMax << ")" << std::endl;
      }

      {
        SubFactoryMonitor m2(*this, "Fused (I-omega*D^{-1} A)*Ptent", coarseLevel);
        RCP<Vector> invDiag = Utilities_kokkos::GetMatrixDiagonalInverse(*A);

        SC omega = dampingFactor / lambdaMax;

        // finalP = Ptent + (I - \omega D^{-1}A) Ptent
        finalP = Xpetra::IteratorOps<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Jacobi(omega, *invDiag, *A, *Ptent, finalP, GetOStream(Statistics2), std::string("MueLu::SaP-") + toString(coarseLevel.GetLevelID()));
      }

    } else {
      finalP = Ptent;
    }

    // Level Set
    if (!restrictionMode_) {
      // prolongation factory is in prolongation mode
      Set(coarseLevel, "P", finalP);

      // NOTE: EXPERIMENTAL
      if (Ptent->IsView("stridedMaps"))
        finalP->CreateView("stridedMaps", Ptent);

    } else {
      // prolongation factory is in restriction mode
      RCP<Matrix> R = Utilities_kokkos::Transpose(*finalP, true);
      Set(coarseLevel, "R", R);

      // NOTE: EXPERIMENTAL
      if (Ptent->IsView("stridedMaps"))
        R->CreateView("stridedMaps", Ptent, true);
    }

    if (IsPrint(Statistics1)) {
      RCP<ParameterList> params = rcp(new ParameterList());
      params->set("printLoadBalancingInfo", true);
      params->set("printCommInfo",          true);
      GetOStream(Statistics1) << PerfUtils::PrintMatrixInfo(*finalP, (!restrictionMode_ ? "P" : "R"), params);
    }

  } //Build()
Beispiel #30
0
template<typename SparseMatrixType> void sparse_product()
{
  typedef typename SparseMatrixType::Index Index;
  Index n = 100;
  const Index rows  = internal::random<int>(1,n);
  const Index cols  = internal::random<int>(1,n);
  const Index depth = internal::random<int>(1,n);
  typedef typename SparseMatrixType::Scalar Scalar;
  enum { Flags = SparseMatrixType::Flags };

  double density = (std::max)(8./(rows*cols), 0.1);
  typedef Matrix<Scalar,Dynamic,Dynamic> DenseMatrix;
  typedef Matrix<Scalar,Dynamic,1> DenseVector;

  Scalar s1 = internal::random<Scalar>();
  Scalar s2 = internal::random<Scalar>();

  // test matrix-matrix product
  {
    DenseMatrix refMat2  = DenseMatrix::Zero(rows, depth);
    DenseMatrix refMat2t = DenseMatrix::Zero(depth, rows);
    DenseMatrix refMat3  = DenseMatrix::Zero(depth, cols);
    DenseMatrix refMat3t = DenseMatrix::Zero(cols, depth);
    DenseMatrix refMat4  = DenseMatrix::Zero(rows, cols);
    DenseMatrix refMat4t = DenseMatrix::Zero(cols, rows);
    DenseMatrix refMat5  = DenseMatrix::Random(depth, cols);
    DenseMatrix refMat6  = DenseMatrix::Random(rows, rows);
    DenseMatrix dm4 = DenseMatrix::Zero(rows, rows);
//     DenseVector dv1 = DenseVector::Random(rows);
    SparseMatrixType m2 (rows, depth);
    SparseMatrixType m2t(depth, rows);
    SparseMatrixType m3 (depth, cols);
    SparseMatrixType m3t(cols, depth);
    SparseMatrixType m4 (rows, cols);
    SparseMatrixType m4t(cols, rows);
    SparseMatrixType m6(rows, rows);
    initSparse(density, refMat2,  m2);
    initSparse(density, refMat2t, m2t);
    initSparse(density, refMat3,  m3);
    initSparse(density, refMat3t, m3t);
    initSparse(density, refMat4,  m4);
    initSparse(density, refMat4t, m4t);
    initSparse(density, refMat6, m6);

//     int c = internal::random<int>(0,depth-1);

    // sparse * sparse
    VERIFY_IS_APPROX(m4=m2*m3, refMat4=refMat2*refMat3);
    VERIFY_IS_APPROX(m4=m2t.transpose()*m3, refMat4=refMat2t.transpose()*refMat3);
    VERIFY_IS_APPROX(m4=m2t.transpose()*m3t.transpose(), refMat4=refMat2t.transpose()*refMat3t.transpose());
    VERIFY_IS_APPROX(m4=m2*m3t.transpose(), refMat4=refMat2*refMat3t.transpose());

    VERIFY_IS_APPROX(m4 = m2*m3/s1, refMat4 = refMat2*refMat3/s1);
    VERIFY_IS_APPROX(m4 = m2*m3*s1, refMat4 = refMat2*refMat3*s1);
    VERIFY_IS_APPROX(m4 = s2*m2*m3*s1, refMat4 = s2*refMat2*refMat3*s1);

    VERIFY_IS_APPROX(m4=(m2*m3).pruned(0), refMat4=refMat2*refMat3);
    VERIFY_IS_APPROX(m4=(m2t.transpose()*m3).pruned(0), refMat4=refMat2t.transpose()*refMat3);
    VERIFY_IS_APPROX(m4=(m2t.transpose()*m3t.transpose()).pruned(0), refMat4=refMat2t.transpose()*refMat3t.transpose());
    VERIFY_IS_APPROX(m4=(m2*m3t.transpose()).pruned(0), refMat4=refMat2*refMat3t.transpose());

    // test aliasing
    m4 = m2; refMat4 = refMat2;
    VERIFY_IS_APPROX(m4=m4*m3, refMat4=refMat4*refMat3);

    // sparse * dense
    VERIFY_IS_APPROX(dm4=m2*refMat3, refMat4=refMat2*refMat3);
    VERIFY_IS_APPROX(dm4=m2*refMat3t.transpose(), refMat4=refMat2*refMat3t.transpose());
    VERIFY_IS_APPROX(dm4=m2t.transpose()*refMat3, refMat4=refMat2t.transpose()*refMat3);
    VERIFY_IS_APPROX(dm4=m2t.transpose()*refMat3t.transpose(), refMat4=refMat2t.transpose()*refMat3t.transpose());

    VERIFY_IS_APPROX(dm4=m2*(refMat3+refMat3), refMat4=refMat2*(refMat3+refMat3));
    VERIFY_IS_APPROX(dm4=m2t.transpose()*(refMat3+refMat5)*0.5, refMat4=refMat2t.transpose()*(refMat3+refMat5)*0.5);

    // dense * sparse
    VERIFY_IS_APPROX(dm4=refMat2*m3, refMat4=refMat2*refMat3);
    VERIFY_IS_APPROX(dm4=refMat2*m3t.transpose(), refMat4=refMat2*refMat3t.transpose());
    VERIFY_IS_APPROX(dm4=refMat2t.transpose()*m3, refMat4=refMat2t.transpose()*refMat3);
    VERIFY_IS_APPROX(dm4=refMat2t.transpose()*m3t.transpose(), refMat4=refMat2t.transpose()*refMat3t.transpose());

    // sparse * dense and dense * sparse outer product
    test_outer<SparseMatrixType,DenseMatrix>::run(m2,m4,refMat2,refMat4);

    VERIFY_IS_APPROX(m6=m6*m6, refMat6=refMat6*refMat6);
  }

  // test matrix - diagonal product
  {
    DenseMatrix refM2 = DenseMatrix::Zero(rows, cols);
    DenseMatrix refM3 = DenseMatrix::Zero(rows, cols);
    DenseMatrix d3 = DenseMatrix::Zero(rows, cols);
    DiagonalMatrix<Scalar,Dynamic> d1(DenseVector::Random(cols));
    DiagonalMatrix<Scalar,Dynamic> d2(DenseVector::Random(rows));
    SparseMatrixType m2(rows, cols);
    SparseMatrixType m3(rows, cols);
    initSparse<Scalar>(density, refM2, m2);
    initSparse<Scalar>(density, refM3, m3);
    VERIFY_IS_APPROX(m3=m2*d1, refM3=refM2*d1);
    VERIFY_IS_APPROX(m3=m2.transpose()*d2, refM3=refM2.transpose()*d2);
    VERIFY_IS_APPROX(m3=d2*m2, refM3=d2*refM2);
    VERIFY_IS_APPROX(m3=d1*m2.transpose(), refM3=d1*refM2.transpose());
    
    // evaluate to a dense matrix to check the .row() and .col() iterator functions
    VERIFY_IS_APPROX(d3=m2*d1, refM3=refM2*d1);
    VERIFY_IS_APPROX(d3=m2.transpose()*d2, refM3=refM2.transpose()*d2);
    VERIFY_IS_APPROX(d3=d2*m2, refM3=d2*refM2);
    VERIFY_IS_APPROX(d3=d1*m2.transpose(), refM3=d1*refM2.transpose());
  }

  // test self adjoint products
  {
    DenseMatrix b = DenseMatrix::Random(rows, rows);
    DenseMatrix x = DenseMatrix::Random(rows, rows);
    DenseMatrix refX = DenseMatrix::Random(rows, rows);
    DenseMatrix refUp = DenseMatrix::Zero(rows, rows);
    DenseMatrix refLo = DenseMatrix::Zero(rows, rows);
    DenseMatrix refS = DenseMatrix::Zero(rows, rows);
    SparseMatrixType mUp(rows, rows);
    SparseMatrixType mLo(rows, rows);
    SparseMatrixType mS(rows, rows);
    do {
      initSparse<Scalar>(density, refUp, mUp, ForceRealDiag|/*ForceNonZeroDiag|*/MakeUpperTriangular);
    } while (refUp.isZero());
    refLo = refUp.adjoint();
    mLo = mUp.adjoint();
    refS = refUp + refLo;
    refS.diagonal() *= 0.5;
    mS = mUp + mLo;
    // TODO be able to address the diagonal....
    for (int k=0; k<mS.outerSize(); ++k)
      for (typename SparseMatrixType::InnerIterator it(mS,k); it; ++it)
        if (it.index() == k)
          it.valueRef() *= 0.5;

    VERIFY_IS_APPROX(refS.adjoint(), refS);
    VERIFY_IS_APPROX(mS.adjoint(), mS);
    VERIFY_IS_APPROX(mS, refS);
    VERIFY_IS_APPROX(x=mS*b, refX=refS*b);

    VERIFY_IS_APPROX(x=mUp.template selfadjointView<Upper>()*b, refX=refS*b);
    VERIFY_IS_APPROX(x=mLo.template selfadjointView<Lower>()*b, refX=refS*b);
    VERIFY_IS_APPROX(x=mS.template selfadjointView<Upper|Lower>()*b, refX=refS*b);
  }
}