Example #1
0
void evaluateSPQRSolver(const Eigen::MatrixXd& A, const Eigen::VectorXd& b,
    const Eigen::VectorXd& x) {
  cholmod_common cholmod;
  cholmod_l_start(&cholmod);
  cholmod_sparse* A_CS = aslam::calibration::eigenDenseToCholmodSparseCopy(A,
    &cholmod);
  cholmod_dense b_CD;
  aslam::calibration::eigenDenseToCholmodDenseView(b, &b_CD);
  Eigen::VectorXd x_est;
//  const double before = aslam::calibration::Timestamp::now();
  SuiteSparseQR_factorization<double>* factor = SuiteSparseQR_factorize<double>(
    SPQR_ORDERING_BEST, SPQR_DEFAULT_TOL, A_CS, &cholmod);
  cholmod_dense* Qtb = SuiteSparseQR_qmult<double>(SPQR_QTX, factor, &b_CD,
    &cholmod);
  cholmod_dense* x_est_cd = SuiteSparseQR_solve<double>(SPQR_RETX_EQUALS_B,
    factor, Qtb, &cholmod);
  cholmod_l_free_dense(&Qtb, &cholmod);
  aslam::calibration::cholmodDenseToEigenDenseCopy(x_est_cd, x_est);
  cholmod_l_free_dense(&x_est_cd, &cholmod);
//  std::cout << "estimated rank: " << factor->rank << std::endl;
//  std::cout << "estimated rank deficiency: " << A.cols() - factor->rank
//    << std::endl;
  SuiteSparseQR_free(&factor, &cholmod);
//  const double after = aslam::calibration::Timestamp::now();
//  const double error = (b - A * x_est).norm();
//  std::cout << std::fixed << std::setprecision(18) << "error: " << error
//    << " est_diff: " << (x - x_est).norm() << " time: " << after - before
//    << std::endl;
  cholmod_l_free_sparse(&A_CS, &cholmod);
  cholmod_l_finish(&cholmod);
}
Example #2
0
int main (int argc, char **argv)
{
	cholmod_common Common, *cc ;
	cholmod_sparse *A ;
	cholmod_dense *X, *B, *Residual ;
	double rnorm, one [2] = {1,0}, minusone [2] = {-1,0} ;
	int mtype ;
	// start CHOLMOD
	cc = &Common ;
	cholmod_l_start (cc) ;
	// load A
	A = (cholmod_sparse *) cholmod_l_read_matrix (stdin, 1, &mtype, cc) ;
	// B = ones (size (A,1),1)
	B = cholmod_l_ones (A->nrow, 1, A->xtype, cc) ;
	// X = A\B
	X = SuiteSparseQR <double> (A, B, cc) ;
	// rnorm = norm (B-A*X)
	Residual = cholmod_l_copy_dense (B, cc) ;
	cholmod_l_sdmult (A, 0, minusone, one, X, Residual, cc) ;
	rnorm = cholmod_l_norm_dense (Residual, 2, cc) ;
	printf ("2-norm of residual: %8.1e\n", rnorm) ;
	printf ("rank %ld\n", cc->SPQR_istat [4]) ;
	// free everything and finish CHOLMOD
	cholmod_l_free_dense (&Residual, cc) ;
	cholmod_l_free_sparse (&A, cc) ;
	cholmod_l_free_dense (&X, cc) ;
	cholmod_l_free_dense (&B, cc) ;
	cholmod_l_finish (cc) ;
	return (0) ;
Example #3
0
   const DenseMatrix<Real>& DenseMatrix<Real> :: operator=( cholmod_dense* B )
   // copies a cholmod_dense* into a DenseMatrix;
   // takes responsibility for deallocating B
   {
      assert( B );
      assert( B->xtype == CHOLMOD_REAL );

      if( cData )
      {
         cholmod_l_free_dense( &cData, context );
      }
      cData = B;
      
      m = cData->nrow;
      n = cData->ncol;
      data.resize( m*n );

      double* x = (double*) cData->x;
      for( int i = 0; i < m*n; i++ )
      {
         data[i] = x[i];
      }

      return *this;
   }
Example #4
0
   const DenseMatrix<Quaternion>& DenseMatrix<Quaternion> :: operator=( cholmod_dense* B )
   // copies a cholmod_dense* into a DenseMatrix;
   // takes responsibility for deallocating B
   {
      assert( B );
      assert( B->xtype == CHOLMOD_REAL );
      assert( B->ncol == 1 );
      assert( B->nrow%4 == 0 );

      if( cData )
      {
         cholmod_l_free_dense( &cData, context );
      }
      cData = B;

      m = cData->nrow/4;
      n = 1;
      data.resize( m*n );

      double* x = (double*) cData->x;
      for( int i = 0; i < m; i++ )
      {
         data[i] = Quaternion( x[i*4+0],
                               x[i*4+1],
                               x[i*4+2],
                               x[i*4+3] );
      }

      return *this;
   }
Example #5
0
void check_residual
(
    cholmod_sparse *A,
    cholmod_dense *X,
    cholmod_dense *B,
    cholmod_common *cc
)
{
    Long m = A->nrow ;
    Long n = A->ncol ;
    Long rnk ;
    double rnorm, anorm, xnorm, atrnorm ;
    double one [2] = {1,0}, minusone [2] = {-1,0}, zero [2] = {0,0} ;
    cholmod_dense *r, *atr ;

    // get the rank(A) estimate
    rnk = cc->SPQR_istat [4] ;

    // anorm = norm (A,1) ;
    anorm = cholmod_l_norm_sparse (A, 1, cc) ;

    // rnorm = norm (A*X-B)
    r = cholmod_l_copy_dense (B, cc) ;
    cholmod_l_sdmult (A, 0, one, minusone, X, r, cc) ;
    rnorm = cholmod_l_norm_dense (r, 2, cc) ;

    // xnorm = norm (X)
    xnorm = cholmod_l_norm_dense (X, 2, cc) ;

    // atrnorm = norm (A'*r)
    atr = cholmod_l_zeros (n, 1, r->xtype, cc) ;        // atr = zeros (n,1)
    cholmod_l_sdmult (A, 1, one, zero, r, atr, cc) ;    // atr = A'*r
    atrnorm = cholmod_l_norm_dense (atr, 2, cc) ;       // atrnorm = norm (atr)
    if (anorm > 0) atrnorm /= anorm ;

    if (m <= n && anorm > 0 && xnorm > 0)
    {
        // find the relative residual, except for least-squares systems
        rnorm /= (anorm * xnorm) ;
    }
    printf ("relative norm(Ax-b): %8.1e rank: %6ld  "
        "rel. norm(A'(Ax-b)) %8.1e\n", rnorm, rnk, atrnorm) ;
    cholmod_l_free_dense (&r, cc) ;
    cholmod_l_free_dense (&atr, cc) ;
}
Example #6
0
   const Dense& Dense :: operator=( cholmod_dense* A )
   {
      if( data ) { cholmod_l_free_dense( &data, common ); data = NULL; }

      data = A;

      initializeFromCopy();

      return *this;
   }
   const DenseMatrix& DenseMatrix :: operator=( const DenseMatrix& A )
   // copies A
   {
      if( data ) { cholmod_l_free_dense( &data, context ); data = NULL; }

      data = cholmod_l_copy_dense( A.data, context );

      initializeFromCopy();

      return *this;
   }
   const DenseMatrix& DenseMatrix :: operator=( cholmod_dense* A )
   // gets pointer to A; will deallocate A upon destruction
   {
      if( data ) { cholmod_l_free_dense( &data, context ); data = NULL; }

      data = A;

      initializeFromCopy();

      return *this;
   }
Example #9
0
   cholmod_dense* DenseMatrix<Real> :: to_cholmod( void )
   // returns pointer to underlying cholmod_dense data structure
   {
      if( cData )
      {
         cholmod_l_free_dense( &cData, context );
         cData = NULL;
      }

      int d = m; // leading dimension
      cData = cholmod_l_allocate_dense( m, n, d, CHOLMOD_REAL, context );
      double* x = (double*) cData->x;

      for( int i = 0; i < m*n; i++ )
      {
         x[i] = data[i];
      }

      return cData;
   }
Example #10
0
   void Dense :: vertcat( const Dense& A, const Dense& B )
   {
      assert( A.n == B.n );
      assert( A.xtype == B.xtype );

      m = A.m + B.m;
      n = A.n;
      xtype = A.xtype;

      if( data ) { cholmod_l_free_dense( &data, common ); data = NULL; }
      data = cholmod_l_allocate_dense( m, n, m, xtype, common );
      rData = (double*) data->x;
      if( xtype == CHOLMOD_ZOMPLEX )
      {
         iData = (double*) data->z;
      }

      for( int i = 0; i < A.m; i++ )
      for( int j = 0; j < A.n; j++ )
      {
         rData[i+m*j] = A.rData[i+A.m*j];
         if( xtype == CHOLMOD_ZOMPLEX )
         {
            iData[i+m*j] = A.iData[i+A.m*j];
         }
      }

      for( int i = 0; i < B.m; i++ )
      for( int j = 0; j < B.n; j++ )
      {
         rData[(i+A.m)+m*j] = B.rData[i+B.m*j];
         if( xtype == CHOLMOD_ZOMPLEX )
         {
            iData[(i+A.m)+m*j] = B.iData[i+B.m*j];
         }
      }
   }
Example #11
0
mxArray *spqr_mx_put_dense
(
    cholmod_dense **Ahandle,	// CHOLMOD version of the matrix
    cholmod_common *cc
)
{
    mxArray *Amatlab ;
    cholmod_dense *A ;

    A = *Ahandle ;

    if (A->d != A->nrow)
    {
        mexErrMsgIdAndTxt ("QR:internalError", "internal error!") ;
    }

    Amatlab = spqr_mx_put_dense2 (A->nrow, A->ncol, (double *) A->x,
        A->xtype != CHOLMOD_REAL, cc) ;

    A->x = NULL ;
    A->z = NULL ;
    cholmod_l_free_dense (Ahandle, cc) ;
    return (Amatlab) ;
}
Example #12
0
   cholmod_dense* DenseMatrix<Quaternion> :: to_cholmod( void )
   // returns pointer to underlying cholmod_dense data structure
   {
      assert( nColumns() == 1 );

      if( cData )
      {
         cholmod_l_free_dense( &cData, context );
         cData = NULL;
      }

      cData = cholmod_l_allocate_dense( m*4, 1, m*4, CHOLMOD_REAL, context );
      double* x = (double*) cData->x;

      for( int i = 0; i < m*n; i++ )
      {
         for( int k = 0; k < 4; k++ )
         {
            x[i*4+k] = data[i][k];
         }
      }

      return cData;
   }
Example #13
0
int main (int argc, char **argv)
{
    cholmod_sparse *A, *R ;
    cholmod_dense *B, *C ;
    SuiteSparse_long *E ;
    int mtype ;
    long m, n, rnk ;
    size_t total_mem, available_mem ;
    double t ;

    // start CHOLMOD
    cholmod_common *cc, Common ;
    cc = &Common ;
    cholmod_l_start (cc) ;

    // warmup the GPU.  This can take some time, but only needs
    // to be done once
    cc->useGPU = false ;
    t = SuiteSparse_time ( ) ;
    cholmod_l_gpu_memorysize (&total_mem, &available_mem, cc) ;
    cc->gpuMemorySize = available_mem ;
    t = SuiteSparse_time ( ) - t ;
    if (cc->gpuMemorySize <= 1)
    {
        printf ("no GPU available\n") ;
    }
    printf ("available GPU memory: %g MB, warmup time: %g\n",
        (double) (cc->gpuMemorySize) / (1024 * 1024), t) ;

    // A = mread (stdin) ; read in the sparse matrix A
    const char *filename = argv[1];
    FILE *file = fopen(filename, "r");
    A = (cholmod_sparse *) cholmod_l_read_matrix (file, 1, &mtype, cc) ;
    fclose(file);
    if (mtype != CHOLMOD_SPARSE)
    {
        printf ("input matrix must be sparse\n") ;
        exit (1) ;
    }

    // [m n] = size (A) ;
    m = A->nrow ;
    n = A->ncol ;

    long ordering = (argc < 3 ? SPQR_ORDERING_DEFAULT : atoi(argv[2]));

    printf ("Matrix %6ld-by-%-6ld nnz: %6ld\n",
        m, n, cholmod_l_nnz (A, cc)) ;

    // B = ones (m,1), a dense right-hand-side of the same type as A
    B = cholmod_l_ones (m, 1, A->xtype, cc) ;

    double tol = SPQR_NO_TOL ;
    long econ = 0 ;

    // [Q,R,E] = qr (A), but discard Q
    // SuiteSparseQR <double> (ordering, tol, econ, A, &R, &E, cc) ;

    // [C,R,E] = qr (A,b), but discard Q
    SuiteSparseQR <double> (ordering, tol, econ, A, B, &C, &R, &E, cc) ;

    // now R'*R-A(:,E)'*A(:,E) should be epsilon
    // and C = Q'*b.  The solution to the least-squares problem
    // should be x=R\C.

    // write out R to a file
    FILE *f = fopen ("R.mtx", "w") ;
    cholmod_l_write_sparse (f, R, NULL, NULL, cc) ;
    fclose (f) ;

    // write out C to a file
    f = fopen ("C.mtx", "w") ;
    cholmod_l_write_dense (f, C, NULL, cc) ;
    fclose (f) ;

    // write out E to a file
    f = fopen ("E.txt", "w") ;
    for (long i = 0 ; i < n ; i++)
    {
        fprintf (f, "%ld\n", 1 + E [i]) ;
    }
    fclose (f) ;

    // free everything
    cholmod_l_free_sparse (&A, cc) ;
    cholmod_l_free_sparse (&R, cc) ;
    cholmod_l_free_dense  (&C, cc) ;
    // cholmod_l_free (&E, cc) ;
    cholmod_l_finish (cc) ;

    return (0) ;
}
Example #14
0
void mexFunction
(
    int nargout,
    mxArray *pargout [ ],
    int nargin,
    const mxArray *pargin [ ]
)
{
    double dummy = 0, *Px, *Xsetx ;
    Long *Lp, *Lnz, *Xp, *Xi, xnz, *Perm, *Lprev, *Lnext, *Xsetp ;
    cholmod_sparse *Bset, Bmatrix, *Xset ;
    cholmod_dense *Bdense, *X, *Y, *E ;
    cholmod_factor *L ;
    cholmod_common Common, *cm ;
    Long k, j, n, head, tail, xsetlen ;
    int sys, kind ;

    /* ---------------------------------------------------------------------- */
    /* start CHOLMOD and set parameters */
    /* ---------------------------------------------------------------------- */

    cm = &Common ;
    cholmod_l_start (cm) ;
    sputil_config (SPUMONI, cm) ;

    /* ---------------------------------------------------------------------- */
    /* check inputs */
    /* ---------------------------------------------------------------------- */

    if (nargin != 5 || nargout > 2)
    {
        mexErrMsgTxt ("usage: [x xset] = lsubsolve (L,kind,P,b,system)") ;
    }

    n = mxGetN (pargin [0]) ;
    if (!mxIsSparse (pargin [0]) || n != mxGetM (pargin [0]))
    {
        mexErrMsgTxt ("lsubsolve: L must be sparse and square") ;
    }
    if (mxGetNumberOfElements (pargin [1]) != 1)
    {
        mexErrMsgTxt ("lsubsolve: kind must be a scalar") ;
    }

    if (mxIsSparse (pargin [2]) ||
            !(mxIsEmpty (pargin [2]) || mxGetNumberOfElements (pargin [2]) == n))
    {
        mexErrMsgTxt ("lsubsolve: P must be size n, or empty") ;
    }

    if (mxGetM (pargin [3]) != n || mxGetN (pargin [3]) != 1)
    {
        mexErrMsgTxt ("lsubsolve: b wrong dimension") ;
    }
    if (!mxIsSparse (pargin [3]))
    {
        mexErrMsgTxt ("lxbpattern: b must be sparse") ;
    }
    if (mxGetNumberOfElements (pargin [4]) != 1)
    {
        mexErrMsgTxt ("lsubsolve: system must be a scalar") ;
    }

    /* ---------------------------------------------------------------------- */
    /* get the inputs */
    /* ---------------------------------------------------------------------- */

    kind = (int) sputil_get_integer (pargin [1], FALSE, 0) ;
    sys  = (int) sputil_get_integer (pargin [4], FALSE, 0) ;

    /* ---------------------------------------------------------------------- */
    /* get the sparse b */
    /* ---------------------------------------------------------------------- */

    /* get sparse matrix B (unsymmetric) */
    Bset = sputil_get_sparse (pargin [3], &Bmatrix, &dummy, 0) ;
    Bdense = cholmod_l_sparse_to_dense (Bset, cm) ;
    Bset->x = NULL ;
    Bset->z = NULL ;
    Bset->xtype = CHOLMOD_PATTERN ;

    /* ---------------------------------------------------------------------- */
    /* construct a shallow copy of the input sparse matrix L */
    /* ---------------------------------------------------------------------- */

    /* the construction of the CHOLMOD takes O(n) time and memory */

    /* allocate the CHOLMOD symbolic L */
    L = cholmod_l_allocate_factor (n, cm) ;
    L->ordering = CHOLMOD_NATURAL ;

    /* get the MATLAB L */
    L->p = mxGetJc (pargin [0]) ;
    L->i = mxGetIr (pargin [0]) ;
    L->x = mxGetPr (pargin [0]) ;
    L->z = mxGetPi (pargin [0]) ;

    /* allocate and initialize the rest of L */
    L->nz = cholmod_l_malloc (n, sizeof (Long), cm) ;
    Lp = L->p ;
    Lnz = L->nz ;
    for (j = 0 ; j < n ; j++)
    {
        Lnz [j] = Lp [j+1] - Lp [j] ;
    }

    /* these pointers are not accessed in cholmod_solve2 */
    L->prev = cholmod_l_malloc (n+2, sizeof (Long), cm) ;
    L->next = cholmod_l_malloc (n+2, sizeof (Long), cm) ;
    Lprev = L->prev ;
    Lnext = L->next ;

    head = n+1 ;
    tail = n ;
    Lnext [head] = 0 ;
    Lprev [head] = -1 ;
    Lnext [tail] = -1 ;
    Lprev [tail] = n-1 ;
    for (j = 0 ; j < n ; j++)
    {
        Lnext [j] = j+1 ;
        Lprev [j] = j-1 ;
    }
    Lprev [0] = head ;

    L->xtype = (mxIsComplex (pargin [0])) ? CHOLMOD_ZOMPLEX : CHOLMOD_REAL ;
    L->nzmax = Lp [n] ;

    /* get the permutation */
    if (mxIsEmpty (pargin [2]))
    {
        L->Perm = NULL ;
        Perm = NULL ;
    }
    else
    {
        L->ordering = CHOLMOD_GIVEN ;
        L->Perm = cholmod_l_malloc (n, sizeof (Long), cm) ;
        Perm = L->Perm ;
        Px = mxGetPr (pargin [2]) ;
        for (k = 0 ; k < n ; k++)
        {
            Perm [k] = ((Long) Px [k]) - 1 ;
        }
    }

    /* set the kind, LL' or LDL' */
    L->is_ll = (kind == 0) ;
    /*
    cholmod_l_print_factor (L, "L", cm) ;
    */

    /* ---------------------------------------------------------------------- */
    /* solve the system */
    /* ---------------------------------------------------------------------- */

    X = cholmod_l_zeros (n, 1, L->xtype, cm) ;
    Xset = NULL ;
    Y = NULL ;
    E = NULL ;

    cholmod_l_solve2 (sys, L, Bdense, Bset, &X, &Xset, &Y, &E, cm) ;

    cholmod_l_free_dense (&Y, cm) ;
    cholmod_l_free_dense (&E, cm) ;

    /* ---------------------------------------------------------------------- */
    /* return result */
    /* ---------------------------------------------------------------------- */

    pargout [0] = sputil_put_dense (&X, cm) ;

    /* fill numerical values of Xset with one's */
    Xsetp = Xset->p ;
    xsetlen = Xsetp [1] ;
    Xset->x = cholmod_l_malloc (xsetlen, sizeof (double), cm) ;
    Xsetx = Xset->x ;
    for (k = 0 ; k < xsetlen ; k++)
    {
        Xsetx [k] = 1 ;
    }
    Xset->xtype = CHOLMOD_REAL ;

    pargout [1] = sputil_put_sparse (&Xset, cm) ;

    /* ---------------------------------------------------------------------- */
    /* free workspace and the CHOLMOD L, except for what is copied to MATLAB */
    /* ---------------------------------------------------------------------- */

    L->p = NULL ;
    L->i = NULL ;
    L->x = NULL ;
    L->z = NULL ;
    cholmod_l_free_factor (&L, cm) ;
    cholmod_l_finish (cm) ;
    cholmod_l_print_common (" ", cm) ;
}
int main(int argc, char* argv[])
{
	const int bufsize = 512;
    	char buffer[bufsize];
	int m,n,S;
	double time_st,time_end,time_avg;
	//omp_set_num_threads(2);
//	printf("\n-----------------\nnumber of threads fired = %d\n-----------------\n",(int)omp_get_num_threads());
	if(argc!=2)
	{
		cout<<"Insufficient arguments"<<endl;
		return 1;
	}
	
	graph G;

	cerr<<"Start reading                    ";
//	time_st=dsecnd();
	G.create_graph(argv[1]);
//	time_end=dsecnd();
//	time_avg = (time_end-time_st);
//	cout<<"Success              "<<endl;
//	cerr<<"Reading time                     "<<time_avg<<endl;

	cerr<<"Constructing Matrices            ";
//	time_st=dsecnd();
	G.construct_MNA();
	G.construct_NA();
//	time_end=dsecnd();
//	time_avg = (time_end-time_st);
//	cerr<<"Done                 "<<time_avg<<endl;

//	G.construct_sparse_MNA();
	m=G.node_array.size()-1;
	n=G.voltage_edge_id.size();
	
	cout<<endl;
	cout<<"MATRIX STAT:"<<endl;
	cout<<"Nonzero elements:               "<<G.nonzero<<endl;
	cout<<"Number of Rows:                 "<<m+n<<endl;
	cout<<"Nonzero in G:			"<<G.Gnonzero<<endl;
	cout<<"Number of rows in G:		"<<m<<endl;
	cout<<"Nonzero in P: 			"<<G.Pnonzero<<endl;
	cout<<"Number of rows in P:		"<<m<<endl;


//	printf("\n Nonzero = %d", G.nonzero);
//	printf("\n Rows = %d", m+n);

	cout<<"MAT val:		       "<<endl;
	int i,j;

	G.Mat_val[0] += 100;
	G.Gmat[0] +=100;
/*
	for(i=0;i<G.Gnonzero;i++)
		cout<<" "<<G.Gmat[i];
	cout<<endl;
	for(i=0;i<G.Gnonzero;i++)
		cout<<" "<<G.Gcolumns[i];
	cout<<endl;	
	for(i=0;i<m+1;i++)
		cout<<" "<<G.GrowIndex[i];
	cout<<endl;
	for(i=0;i<m;i++)
		printf(" %.8f", G.b1[i]);
	cout<<endl;
	for(i=0;i<m;i++)
		printf(" %.8f", G.x1[i]);
	cout<<endl;	
	
	
*/	SuiteSparse_long *Gnz = (SuiteSparse_long*)calloc(m,sizeof(SuiteSparse_long));
	for(i=0;i<m;i++)
	{
	//	cout<<endl;
		SuiteSparse_long startindex=G.GrowIndex[i];
		SuiteSparse_long endindex=G.GrowIndex[i+1];
		Gnz[i] = endindex - startindex;

//		for(j=startindex;j<endindex;j++)
//			cout<<" "<<G.Gmat[j];
//		cout<<endl;
		
	}


/*	for(i=0;i<G.Pnonzero;i++)
		cout<<" "<<G.Pmat[i];
	cout<<endl;
	for(i=0;i<G.Pnonzero;i++)
		cout<<" "<<G.Pcolumns[i];
	cout<<endl;	
	for(i=0;i<m+1;i++)
		cout<<" "<<G.ProwIndex[i];
	cout<<endl;
/*	for(i=0;i<m;i++)
		printf(" %.8f", G.b1[i]);
	cout<<endl;
	for(i=0;i<m;i++)
		printf(" %.8f", G.x1[i]);
	cout<<endl;	
	
	
	for(i=0;i<m;i++)
	{
		cout<<endl;
		int startindex=G.ProwIndex[i];
		int endindex=G.ProwIndex[i+1];
		for(j=startindex;j<endindex;j++)
			cout<<" "<<G.Pmat[j];
		cout<<endl;
		
	}
	
/*	for(i=0;i<G.nonzero;i++)
		cout<<" "<<G.Mat_val[i];
	cout<<endl;
	for(i=0;i<G.nonzero;i++)
		cout<<" "<<G.columns[i];
	cout<<endl;	
	for(i=0;i<m+n+1;i++)
		cout<<" "<<G.rowIndex[i];
	cout<<endl;
	for(i=0;i<m+n;i++)
		printf(" %.8f", G.b[i]);
	cout<<endl;
	for(i=0;i<m+n;i++)
		printf(" %.8f", G.x[i]);
	cout<<endl;	
	
	
	for(i=0;i<m+n;i++)
	{
		cout<<endl;
		int startindex=G.rowIndex[i];
		int endindex=G.rowIndex[i+1];
		for(j=startindex;j<endindex;j++)
			cout<<" "<<G.Mat_val[j];
		cout<<endl;
		
	}
*/
/*	for (i=0;i<m+n+1;i++)
	{
		//cout<<endl;
		if(G.rowIndex[i]==G.rowIndex[i+1])
			break;
		
		for(j=G.rowIndex[i];j<G.rowIndex[i+1];j++)
		{
			if(G.Mat_val[j]>10)
				cout<<G.Mat_val[j]<<"\t";
		}
		//cout<<endl;
		/*for(j=G.rowIndex[i];j<G.rowIndex[i+1];j++)
		{
			cout<<G.columns[j]<<"\t";
		}
		//cout<<endl;
	}
	cout<<endl;
*/

//printing the matrix
	printf("\n Fine till here");
	printf("\n");
//	int* rowmIndex=(int*)calloc(m+1,sizeof(int));
	printf("\n Fine till here");
	printf("\n");
	//int rowmIndex[5]={1,2,3,4,5};
/*	for(i=0;i<m+1;i++)
	{
		rowmIndex[i]=G.rowIndex[i];
		printf(" %d", rowmIndex[i]);
	}
*/
	printf("\n Allocating GPU memory\n");
	cudaDeviceReset();
	size_t free, total;
	cudaMemGetInfo(&free, &total);
	printf("\n Free Mem = %lf MB, Total mem = %lf MB\n", (double)(free)/(1024*1024), (double)(total)/(1024*1024));


	double *dev_csrValA, *dev_b, *dev_x;
	int *dev_csrRowIdxA, *dev_csrColA;

	double *dev_GcsrVal, *dev_b1, *dev_x1;
	double *dev_PcsrVal, *dev_b2, *dev_x2;
	
	int *dev_GcsrRowIdx, *dev_PcsrRowIdx, *dev_GcsrCol, *dev_PcsrCol;
	

	
	cudaMalloc((void**)&dev_PcsrVal, G.Pnonzero*sizeof(double));
	cudaMalloc((void**)&dev_PcsrRowIdx, (m+1)*sizeof(int));
	cudaMalloc((void**)&dev_PcsrCol, G.Pnonzero*sizeof(int));


	cudaMalloc((void**)&dev_b1, (m)*sizeof(double));
	cudaMalloc((void**)&dev_b2, n*sizeof(double));
	cudaMalloc((void**)&dev_x1, m*sizeof(double));
	cudaMalloc((void**)&dev_x2, n*sizeof(double));

	cudaMemcpy(dev_b1, G.b1, (m)*sizeof(double), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_x1, G.x1, (m)*sizeof(double), cudaMemcpyHostToDevice);

	cudaMemcpy(dev_PcsrVal, G.Pmat, G.Pnonzero*sizeof(double), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_b2, G.b2, (n)*sizeof(double), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_x2, G.x2, (n)*sizeof(double), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_PcsrRowIdx, G.ProwIndex, (m+1)*sizeof(int), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_PcsrCol, G.Pcolumns, (G.Pnonzero)*sizeof(int), cudaMemcpyHostToDevice);


	/* Matrix has been created and stored in CSR format.
	However, CHOLMOD requires CSC format. Since our matrix is symmetric positive definite, we can simply swap 
	csrColA with csrRowIdx and vice versa
	*/

	/* Starting the CHOLMOD routine now*/
	printf("\n Initiating CHOLMOD\n");
	cholmod_sparse *A, *P;
	cholmod_dense *x, *b, *r, *midvec;
	cholmod_factor *L;
	cholmod_common *Common, cm;
	Common = &cm;
	cholmod_l_start(Common);
//	&Common->useGPU=1;
	printf("\n m = %d, G.Gnonzero = %d\n", m, G.Gnonzero);	


	
	cholmod_sparse *C = cholmod_l_allocate_sparse((size_t)(m), (size_t)(m), (size_t)(G.Gnonzero), 1, 0, 1, 1, Common); 
//	P = cholmod_l_allocate_sparse((size_t)(m), (size_t)(n), (size_t)(G.Pnonzero), 1, 0, 0, 1, Common); 
//	printf("\n Allocated \n");

	C->itype = CHOLMOD_LONG;	
//	printf("\n Itype \n");
	C->p = &G.GrowIndex[0];
//	printf("\n Columns \n");
	C->nz = &Gnz[0];
//	printf("\n Rows \n");
	C->i = &G.Gcolumns[0];
	C->dtype = 0;
	C->x = &G.Gmat[0];
	 
/*	P->itype = CHOLMOD_LONG;
	P->p = &G.ProwIndex[0];
	P->nz = &Pnz[0];
	P->i = &G.Pcolumns[0];
	P->dtype = 0;
	P->x = &G.Pmat[0]; 
*/	 

	b = cholmod_l_allocate_dense((size_t)(m), 1, (size_t)(m), 1, Common);
	b->dtype=0;
	b->x = &G.b1[0];
	b->xtype = 1;

	printf("\n CHOLMOD manually set\n");
	cholmod_l_print_sparse(C, "A", Common);
	cholmod_l_print_dense(b, "b", Common);


	cudaEvent_t start, stop;
	cudaEventCreate(&start);
	cudaEventCreate(&stop);

	cudaEventRecord(start, 0);

	L = cholmod_l_analyze(C, Common);
	printf("\n Analysis: Flops: %g \t lnz: %g\n", Common->fl, Common->lnz);
	cholmod_l_factorize(C, L, Common);
	x = cholmod_l_solve(CHOLMOD_A, L, b, Common);
	
	cudaEventRecord(stop, 0);
	cudaEventSynchronize(stop);
	
	

	float elapsedTime;
	cudaEventElapsedTime(&elapsedTime, start, stop);
	printf("\n Time : %.6f secs :\n", elapsedTime);
	
	cholmod_l_print_dense(x, "X", Common);
	
	double *x1_mod = (double*)x->x;
	
	cudaMemcpy(dev_x1, x1_mod, m*sizeof(double), cudaMemcpyHostToDevice);
	
	cusparseStatus_t cuSparseStatus;
	cusparseHandle_t cuspHandle;
	cuSparseStatus = cusparseCreate(&cuspHandle);

	cusparseMatDescr_t descrP;
	cusparseCreateMatDescr(&descrP);

	cusparseSetMatType(descrP, CUSPARSE_MATRIX_TYPE_GENERAL);	
	cusparseSetMatIndexBase(descrP, CUSPARSE_INDEX_BASE_ZERO);
	
	
	double *dev_res1, *dev_simple;
	double *res1 = (double*)calloc(n,sizeof(double));
	cudaMalloc((void**)&dev_res1, n*sizeof(double));
	cudaMalloc((void**)&dev_simple, n*sizeof(double));
	
	const double alpha = 1.0, beta=0.0;
	//alpha = 1.0;
	//beta = 0.0;
	
	//solving P^T * G^-1 * b1 Result stored in dev_res1
	
	cuSparseStatus = cusparseDcsrmv(cuspHandle, CUSPARSE_OPERATION_TRANSPOSE, m, n, G.Pnonzero, &alpha, descrP, dev_PcsrVal, dev_PcsrRowIdx, dev_PcsrCol, dev_x1, &beta, dev_res1);
		
	if(cuSparseStatus == CUSPARSE_STATUS_SUCCESS)
	{
/*		cudaMemcpy(res1, dev_res1, n*sizeof(double), cudaMemcpyDeviceToHost);
		for(i=0;i<n;i++)
		{
			printf("\nres1[%d] = %.8f", i, res1[i]);
		}
		printf("\n P^T * G^-1 * b1 done! Vector stored in res1");
*/	}
	else
	{
		printf("\n P^T * G^-1 * b1 failed\n");
		exit(1);
	}
	
	const double alphaneg = -1.0;
		
		//Solving P^T * G^-1 * b1 - b2 ; Result stored in dev_res1
	
		
	cublasStatus_t cuBlasStatus;
	cublasHandle_t cubHandle;
	cuBlasStatus = cublasCreate(&cubHandle);
		
	cuBlasStatus = cublasDaxpy(cubHandle, n, &alphaneg, dev_b2, 1, dev_res1, 1);
	if(cuBlasStatus == CUBLAS_STATUS_SUCCESS)
	{
//		cudaMemcpy(res1, dev_res1, n*sizeof(double), cudaMemcpyDeviceToHost);
//		for(i=0;i<n;i++)
//		{
//			printf("\nres1[%d] = %.8f", i, res1[i]);
//		}
		printf("\n res1 = res1 - b2 done\n");
		 
	}
	else
	{
		printf("\n res1 = res1 - b2 failed\n");
	}
		
	
	
	
	///NOW COMPUTING G^-1 * P
	
	
	int k = 0;
	int breakloop=0;
	
	double **midMat = (double**)malloc(m*sizeof(double*));
	for(i=0;i<m;i++)
	{
		midMat[i] = (double*)calloc(n,sizeof(double));
	}
	
	cudaEventRecord(start, 0);
	
	for(i=0;i<n;i++)
	{
		breakloop = 0;
		double *vect = (double*)calloc(m,sizeof(double*));

		for(j=0;j<m;j++)
		{
			int startin = G.ProwIndex[j];
			int endin = G.ProwIndex[j+1];
			if(startin == endin)
				continue;
				
			k = startin;

			while(k<endin)
			{

				if(G.Pcolumns[k] == i)
				{	
					vect[j] = G.Pmat[k];
					breakloop=1;
					break;
				
				}
				k++;
			}
			if(breakloop == 1)
			{
				break;
			}
		}
		
		midvec = cholmod_l_allocate_dense((size_t)(m), 1, (size_t)(m), 1, Common);
		midvec->dtype=0;
		midvec->x=&vect[0];
		midvec->xtype = 1;
		
		cholmod_dense *res2;
		
		res2 = cholmod_l_solve(CHOLMOD_A, L, midvec, Common);

		
		double *re = (double*)res2->x;
		
//		printf("\n vector %d is:\n", i);
		int i1, j1, k1;
//		for(j1=0;j1<m;j1++)
//		{
//			midmat2flat[i+j1*n] = re[j1];
//			printf(" %lf", re[j1]);
//		}
//		printf("\n");
		for(i1=0;i1<m;i1++)
		{
			midMat[i1][i] = re[i1];
		}
		
		cholmod_l_free_dense(&midvec, Common);
			
	}
	
/*	printf("\n Midmat = \n");
	for(i=0;i<m;i++)
	{
		for(j=0;j<n;j++)
		{
			printf(" %lf", midMat[i][j]);
		}
		printf("\n");
	} 
*/
	double *midMatflat = (double*)calloc((m*n),sizeof(double));
	double *dev_midMat;
	double *dev_solut;
	int counter = 0;
	for(i=0;i<n;i++)
	{
		for(j=0;j<m;j++)
		{
			midMatflat[counter] = midMat[j][i];
			counter++; 
		}
	}
	
	cudaMalloc((void**)&dev_midMat, m*n*sizeof(double));
	cudaMalloc((void**)&dev_solut, n*n*sizeof(double));
	
	cudaMemcpy(dev_midMat, midMatflat, m*n*sizeof(double), cudaMemcpyHostToDevice);
		
	//Solving P^T * midMat; Result stored in dev_solut
		
	cuSparseStatus = cusparseDcsrmm(cuspHandle, CUSPARSE_OPERATION_TRANSPOSE, m, n, n, G.Pnonzero, &alpha, descrP, dev_PcsrVal, dev_PcsrRowIdx, dev_PcsrCol, dev_midMat, m, &beta, dev_solut, n);
	
	if(cuSparseStatus == CUSPARSE_STATUS_SUCCESS)
	{
		printf("\n Solved P^T * G^-1 * P. Result stored in solut\n");
		
	}  
	else
	{
		printf("\n  Failed to Solve P^T * G^-1 * P \n");
		exit(1);
	}

/*	double *matGflat = (double*)calloc(n*n,sizeof(double));
	cudaMemcpy(matGflat, dev_solut, n*n*sizeof(double), cudaMemcpyDeviceToHost);
	counter = 0;
	printf("\nBefore LU starts\n");
	for(i=0;i<n;i++)
	{
		for(j=0;j<n;j++)
		{
			printf(" %lf ", matGflat[counter]);
			counter++;
		}
		printf("\n");
	}
	printf("\n");
*/	
	cusolverStatus_t cuSolverStatus;
	
	
	cusolverDnHandle_t cudenHandle;
	cuSolverStatus = cusolverDnCreate(&cudenHandle);
	int Lwork = 0;
	cuSolverStatus = cusolverDnDgetrf_bufferSize(cudenHandle, n, n, dev_solut, n, &Lwork);
	
	if(cuSolverStatus == CUSOLVER_STATUS_SUCCESS)
	{
		printf("\n Buffer works\n Lwork = %d\n", Lwork);
	}
	else
	{
		exit(1);	
	}
	double *dev_Workspace;
	int *dev_Ipiv, *dev_Info;
	
	cudaMalloc((void**)&dev_Workspace, Lwork*sizeof(double));
	cudaMalloc((void**)&dev_Ipiv, n*sizeof(int));
	cudaMalloc((void**)&dev_Info, sizeof(int));
	
	//Calculating LU for dev_solut
//	double *nnmat = (double*)calloc(n*n,sizeof(double));
//	cudaMemcpy(nnmat, dev_solut, n*n*sizeof(double), cudaMemcpyDeviceToHost);
//	cuSolverStatus = cusolverDnDgetrfHost(cudenHandle, n, n, 

	cuSolverStatus = cusolverDnDgetrf(cudenHandle, n, n, dev_solut, n, dev_Workspace, dev_Ipiv, dev_Info);
	
	if(cuSolverStatus == CUSOLVER_STATUS_SUCCESS)
	{
		printf("\n solut has be defactorized into L and U. dev_Ipiv * solut = L * U\n");
	}
	else
	{
	
		printf("\n Unable to defactorize solut into LU\n");
		exit(1);	
	}
	
	//solving dev_solut * x = dev_res1. Result stored in dev_res1
	
	cuSolverStatus = cusolverDnDgetrs(cudenHandle, CUBLAS_OP_N, n, 1, dev_solut, n, dev_Ipiv, dev_res1, n, dev_Info);
	if(cuSolverStatus == CUSOLVER_STATUS_SUCCESS)
	{
		printf("\n Solution obtained for x2 \n");
	}
	else
	{
		printf("\n LU decomposition obtained by LU solver failed\n");
	}

/*	cudaMemcpy(G.x2, dev_res1, n*sizeof(double), cudaMemcpyDeviceToHost);
	printf("\n x2 = \n");
	for(i=0;i<n;i++)
	{
		printf("\n x2[%d] = %lf", i, G.x2[i]); 
	}
*/	
	
	double *dev_dummy;
	cudaMalloc((void**)&dev_dummy, m*sizeof(double));
	cudaMemset(dev_dummy, 0.0, m*sizeof(double));
	
	printf("\n Starting solving for x1 \n");
	//Solving for x1
	
		//Solving G^-1 * P * x2;  G^-1 * P is stored in midMat
	cuBlasStatus = cublasDgemv(cubHandle, CUBLAS_OP_N, m, n, &alpha, dev_midMat, m, dev_res1, 1, &beta, dev_dummy, 1);
	if(cuBlasStatus == CUBLAS_STATUS_SUCCESS)
	{
/*		double *toprint = (double*)calloc(m,sizeof(double));
		cudaMemcpy(toprint, dev_dummy, m*sizeof(double), cudaMemcpyDeviceToHost);
		printf("\n Intermediate vector :\n");
		for(i=0;i<m;i++)
		{
			printf("\ndummy[%d] = %lf", i, toprint[i]);
		}
*/		printf("\n midmat * x2 obtained. Stored in dummy\n");
	}
	else
	{
		printf("\n Failed to obtain midmat * x2\n");
	}
	
	cuBlasStatus = cublasDaxpy(cubHandle, m, &alphaneg, dev_dummy, 1, dev_x1, 1);
	if(cuBlasStatus == CUBLAS_STATUS_SUCCESS)
	{
/*		cudaMemcpy(G.x1, dev_x1, m*sizeof(double), cudaMemcpyDeviceToHost);
		printf("\n x1 = \n");
		for(i=0;i<m;i++)
		{
			printf("\n x1[%d] = %.15f", i, G.x1[i]);
		}
*/		printf("\n x1 obtained");
	}
	else
	{
		printf("\n Failed to obtain x1");
	}

	printf("\n Solver finished its work\n");

/*		cudaEventRecord(stop, 0);
		cudaEventSynchronize(stop);

		cudaEventElapsedTime(&elapsedTime, start, stop);
		printf("\n Time: %.6f msecs :\n", elapsedTime);
*/








	
	
	
	
	

	cholmod_l_finish(Common);
	return 0;

} 
Example #16
0
int main() {
  /* Time Recording Variables */
  int now_s, now_u;
  struct rusage usage;
  double time_now, time_past;
  
  /* x, y, z and global indices */
  int i, j, k, m, n, nodes;
  /* Connectivity enties */
  int m_above, m_below, m_left, m_right, m_front, m_back;
  double c_above, c_below, c_left, c_right, c_front, c_back, c_self;
  
  /* Creat cholmod object */
  cholmod_common Common, *cc;
  /* Compress column form sparse matrix */
  cholmod_sparse *S, *ST;
  /* forcing function on input; voltages after solving */
  cholmod_dense *b, *v0, *r;  
  size_t *Si, *Sp, *Snz, *bi, *bj;
  double *Sx, *bx, *v0x;
 
  /* store the sparse matrix */
  FILE *store;
  char filename[100];
  
  /* start CHOLMOD */
  cc = &Common;
  cholmod_l_start(cc);
  
  /* initialize timing variables*/ 
  getrusage(0, &usage);
  now_s = usage.ru_utime.tv_sec;
  now_u = usage.ru_utime.tv_usec;
  time_past = now_s + now_u  / 1.e6;
  time_now = time_past;
  
  /* total nodes in the grid */
  nodes = IMAX * JMAX * KMAX;
  
  /* Allocate space for connectivity matrix and forcing vector. */
  S = cholmod_l_allocate_sparse(nodes, nodes, 7 * nodes, 
				0, 0, 0, CHOLMOD_REAL, cc);

  b = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc);
  bx = b->x;

  v0 = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc);
  v0x = v0->x;



  /*================================================================*/
  /*===============  make connectivity matrix ======================*/
  /*================================================================*/
  Si = (size_t *) S->i;
  Sp = (size_t *) S->p;
  Sx = (double *) S->x;
  Snz = (size_t *) S->nz;
  n = 0;
  Sp[0] = 0;
  for (k = 0; k < KMAX; k++) {
    for (j = 0; j < JMAX; j++) {
      for (i = 0; i < IMAX; i++) {
	/* Global index in x-fastest order*/
	m = k * IMAX * JMAX + j * IMAX + i;
	Sp[m + 1] = Sp[m];
	v0x[m] = (k + 1.0) / (KMAX + 1.0);

	/* Six coef.s of every node */
	c_below = (mepr(i - 0.5, j - 0.5, k - 0.5) + 
		   mepr(i + 0.5, j - 0.5, k - 0.5) + 
		   mepr(i - 0.5, j + 0.5, k - 0.5) +
		   mepr(i + 0.5, j + 0.5, k - 0.5)) / 4.0;

	c_front = (mepr(i - 0.5, j - 0.5, k - 0.5) + 
		   mepr(i + 0.5, j - 0.5, k - 0.5) + 
		   mepr(i - 0.5, j - 0.5, k + 0.5) +
		   mepr(i + 0.5, j - 0.5, k + 0.5)) / 4.0;

	c_left = (mepr(i - 0.5, j + 0.5, k - 0.5) + 
		  mepr(i - 0.5, j - 0.5, k - 0.5) + 
		  mepr(i - 0.5, j + 0.5, k + 0.5) +
		  mepr(i - 0.5, j - 0.5, k + 0.5)) / 4.0;

	c_right = (mepr(i + 0.5, j + 0.5, k - 0.5) + 
		   mepr(i + 0.5, j - 0.5, k - 0.5) + 
		   mepr(i + 0.5, j + 0.5, k + 0.5) +
		   mepr(i + 0.5, j - 0.5, k + 0.5)) / 4.0;

	c_back = (mepr(i - 0.5, j + 0.5, k - 0.5) + 
		  mepr(i + 0.5, j + 0.5, k - 0.5) + 
		  mepr(i - 0.5, j + 0.5, k + 0.5) +
		  mepr(i + 0.5, j + 0.5, k + 0.5)) / 4.0;

	c_above = (mepr(i - 0.5, j - 0.5, k + 0.5) + 
		   mepr(i + 0.5, j - 0.5, k + 0.5) + 
		   mepr(i - 0.5, j + 0.5, k + 0.5) +
		   mepr(i + 0.5, j + 0.5, k + 0.5))/4.0;

	/* Self term. */
	c_self = -(c_above + c_below + c_left + c_right + c_front + c_back);
	Si[n] = m; Sx[n] = c_self; n++; Sp[m + 1]++;

	/* Node below. Ensure not on bottom face */
	if (k != 0) {
	  m_below = m - IMAX * JMAX;
 	  Si[n] = m_below; Sx[n] = c_below; n++; Sp[m + 1]++;
	} else {
	  bx[m] = -c_below * VBOT;
	}

	/* Node front.  Ensure not on front face. */
	if (j != 0) {
	  m_front = m - IMAX;
	} else {
	  m_front = m + (JMAX - 1) * IMAX;
	}
	Si[n] = m_front; Sx[n] = c_front; n++; Sp[m + 1]++;

	/* Node to left.  Ensure not on left face. */
	if (i != 0) {
	  m_left = m - 1;
	} else {
	  m_left = m + IMAX -1;
	}
	Si[n] = m_left; Sx[n] = c_left; n++; Sp[m + 1]++;	   

	/* Node to right.  Ensure not on right face. */
	if (i != IMAX - 1) {
	  m_right = m + 1;
	} else{
	  m_right = m - IMAX + 1;
	}
	Si[n] = m_right; Sx[n] = c_right; n++; Sp[m + 1]++;

	/* Node back.  Ensure not on back face. */
	if (j != JMAX - 1) {
	  m_back = m + IMAX;
	} else {
	  m_back = m - (JMAX - 1) * IMAX;
	}
	Si[n] = m_back; Sx[n] = c_back; n++; Sp[m + 1]++;

	/* Node top. Ensure not on top face */
	if (k != KMAX - 1) {
	  m_above = m + IMAX * JMAX;
 	  Si[n] = m_above; Sx[n] = c_above; n++; Sp[m + 1]++;
	} else {
	  bx[m] = -c_above * VTOP;
	}

	Snz[m] = Sp[m + 1] - Sp[m];
      }
    }
  }
  /*====================================================================*/
  /*===================Done creating connectivity matrix.===============*/
  /*====================================================================*/

  /* report time*/
  getrusage(0, &usage);
  now_s = usage.ru_utime.tv_sec;
  now_u = usage.ru_utime.tv_usec;
  time_now = now_s + now_u / 1.e6;
  printf("\nFinished creating connectivity matrix\n"
	 "  Incremental time %f\n"
	 "  Running time %f\n", time_now - time_past, time_now);
  time_past = time_now;

  /* Print all three matrixes */
  cholmod_l_print_sparse(S, "S", cc);
  cholmod_l_print_dense(b, "b", cc);
  cholmod_l_print_dense(v0, "v0", cc);

  /* Allocate residual vector */
  r = cholmod_l_zeros(nodes, 1, CHOLMOD_REAL, cc);

  /* The Preconditioned Conjugate Gradient Method */
  /* Calculate the first residual value */
  double one[2], zero[2], minusone[2];
  zero[0] = 0.0;
  zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  minusone[0] = -1.0;
  minusone[1] = 0.0;

  cholmod_l_copy_dense2(b, r, cc);
  cholmod_l_sdmult(S, 0, minusone, one, v0, r, cc);
  printf("Initial 2-norm = %f\n", cholmod_l_norm_dense(r, 2, cc));
  printf("Initial 1-norm = %f\n", cholmod_l_norm_dense(r, 1, cc));
  printf("Initial 0-norm = %f\n", cholmod_l_norm_dense(r, 0, cc));
  

  /* The iteration */
  double rho1, rho0, beta = 0.0, alpha = 0.0;
  cholmod_dense *p1, *p0, *q;
  double *p1x, *p0x, *qx, *rx;
  /*
  p1 = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc);
  p0 = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc);
  q = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc);  
  */
  p1 = cholmod_l_zeros(nodes, 1, CHOLMOD_REAL, cc);
  p0 = cholmod_l_zeros(nodes, 1, CHOLMOD_REAL, cc);
  q = cholmod_l_zeros(nodes, 1, CHOLMOD_REAL, cc);
  p1x = (double *) p1->x;
  p0x = (double *) p0->x;
  qx = (double *) q->x;
  rx = (double *) r->x;

  int iter;
  for (iter = 0; iter < MAX_ITER; iter++) {
    if (cholmod_l_norm_dense(r, 0, cc) < 1e-10)
      break;
    
    rho1 = cholmod_l_norm_dense(r, 2, cc);
    rho1 = rho1 * rho1;
    
    if (iter == 0) {
      cholmod_l_copy_dense2(r, p1, cc);
    } else {
      beta = rho1 / rho0;
      for (i = 0; i < nodes; i++) {
	p1x[i] = rx[i] + beta * p0x[i];
      }
    }
   
    cholmod_l_sdmult(S, 0, one, zero, p1, q, cc);
    alpha = 0;
    for (i = 0; i < nodes; i++) {
      alpha += p1x[i] * qx[i];
    }
    alpha = rho1 / alpha;

    for (i = 0; i < nodes; i++) {
      v0x[i] += alpha * p1x[i];
      rx[i] -= alpha * qx[i];
    }
    /*
    printf("iter = %d\n: rho1 = %f, rho0 = %f, alpha = %f, beta = %f\n", 
	   iter, rho1, rho0, alpha, beta);
    */
    cholmod_l_copy_dense2(p1, p0, cc);
    rho0 = rho1;
  }
  
  cholmod_l_copy_dense2(b, r, cc);
  cholmod_l_sdmult(S, 0, minusone, one, v0, r, cc);
  printf("After %d iterations:\n", iter);
  printf("Final 2-norm: %f\n", cholmod_l_norm_dense(r, 2, cc));
  printf("Final 1-norm: %f\n", cholmod_l_norm_dense(r, 1, cc));
  printf("Final 0-norm: %f\n", cholmod_l_norm_dense(r, 0, cc));
  /* sort sparse matrix and store it 
  cholmod_l_sort(S, cc);
  S->stype = 0;
  */

  /* Check if S is symmetric
  ST = cholmod_l_transpose(S, 2, cc);
  store = fopen("LpT.dat", "w");
  cholmod_l_write_sparse(store, ST, NULL, NULL, cc);
  fclose(store);
  cholmod_l_free_sparse(&ST, cc);
  */
  
  /* store the sparse matrix   */
  /*
  store = fopen("Laplace.dat", "w");
  cholmod_l_write_sparse(store, S, NULL, NULL, cc);
  fclose(store);
  cholmod_l_free_sparse(&S, cc);
  */

  /* store the force vector  */
  /*
  store = fopen("Force.dat", "w");
  cholmod_l_write_dense(store, b, NULL, cc);
  fclose(store);
  cholmod_l_free_dense(&b,cc);
  */

  /* store the guess vector   */
  store = fopen("Voltage.dat", "w");
  cholmod_l_write_dense(store, v0, NULL, cc);
  fclose(store);

  cholmod_l_free_dense(&v0, cc);

  /* store the residual vector  */
  store = fopen("Residual.dat", "w");
  cholmod_l_write_dense(store, r, NULL, cc);
  fclose(store);

  cholmod_l_free_dense(&r, cc);

  /* report time*/
  getrusage(0, &usage);
  now_s = usage.ru_utime.tv_sec;
  now_u = usage.ru_utime.tv_usec;
  time_now = now_s + now_u / 1.e6;
  printf("\nFinished writing matrix\n"
	 "  Incremental time %f\n"
	 "  Running time %f\n", time_now - time_past, time_now);

  cholmod_l_finish(cc);
  return 0;
}
Example #17
0
int main (int argc, char **argv)
{
    cholmod_common Common, *cc ;
    cholmod_sparse *A ;
    cholmod_dense *X, *B ;
    int mtype ;
    Long m, n ;

    // start CHOLMOD
    cc = &Common ;
    cholmod_l_start (cc) ;

    // A = mread (stdin) ; read in the sparse matrix A
    A = (cholmod_sparse *) cholmod_l_read_matrix (stdin, 1, &mtype, cc) ;
    if (mtype != CHOLMOD_SPARSE)
    {
        printf ("input matrix must be sparse\n") ;
        exit (1) ;
    }

    // [m n] = size (A) ;
    m = A->nrow ;
    n = A->ncol ;

    printf ("Matrix %6ld-by-%-6ld nnz: %6ld\n", m, n, cholmod_l_nnz (A, cc)) ;

    // B = ones (m,1), a dense right-hand-side of the same type as A
    B = cholmod_l_ones (m, 1, A->xtype, cc) ;

    // X = A\B ; with default ordering and default column 2-norm tolerance
    if (A->xtype == CHOLMOD_REAL)
    {
        // A, X, and B are all real
        X = SuiteSparseQR <double>
            (SPQR_ORDERING_DEFAULT, SPQR_DEFAULT_TOL, A, B, cc) ;
    }
    else
    {
        // A, X, and B are all complex
        X = SuiteSparseQR < std::complex<double> >
            (SPQR_ORDERING_DEFAULT, SPQR_DEFAULT_TOL, A, B, cc) ;
    }

    check_residual (A, X, B, cc) ;
    cholmod_l_free_dense (&X, cc) ;

    // -------------------------------------------------------------------------
    // factorizing once then solving twice with different right-hand-sides
    // -------------------------------------------------------------------------

    // Just the real case.  Complex case is essentially identical
    if (A->xtype == CHOLMOD_REAL)
    {
        SuiteSparseQR_factorization <double> *QR ;
        cholmod_dense *Y ;
        Long i ;
        double *Bx ;

        // factorize once
        QR = SuiteSparseQR_factorize <double>
            (SPQR_ORDERING_DEFAULT, SPQR_DEFAULT_TOL, A, cc) ;

        // solve Ax=b, using the same B as before

        // Y = Q'*B
        Y = SuiteSparseQR_qmult (SPQR_QTX, QR, B, cc) ;
        // X = R\(E*Y)
        X = SuiteSparseQR_solve (SPQR_RETX_EQUALS_B, QR, Y, cc) ;
        // check the results
        check_residual (A, X, B, cc) ;
        // free X and Y
        cholmod_l_free_dense (&Y, cc) ;
        cholmod_l_free_dense (&X, cc) ;

        // repeat with a different B
        Bx = (double *) (B->x) ;
        for (i = 0 ; i < m ; i++)
        {
            Bx [i] = i ;
        }

        // Y = Q'*B
        Y = SuiteSparseQR_qmult (SPQR_QTX, QR, B, cc) ;
        // X = R\(E*Y)
        X = SuiteSparseQR_solve (SPQR_RETX_EQUALS_B, QR, Y, cc) ;
        // check the results
        check_residual (A, X, B, cc) ;
        // free X and Y
        cholmod_l_free_dense (&Y, cc) ;
        cholmod_l_free_dense (&X, cc) ;

        // free QR
        SuiteSparseQR_free (&QR, cc) ;
    }

    // -------------------------------------------------------------------------
    // free everything that remains
    // -------------------------------------------------------------------------

    cholmod_l_free_sparse (&A, cc) ;
    cholmod_l_free_dense (&B, cc) ;
    cholmod_l_finish (cc) ;
    return (0) ;
}
 DenseMatrix :: ~DenseMatrix( void )
 // destructor
 {
    if( data ) cholmod_l_free_dense( &data, context );
 }
Example #19
0
 static int free_dense(cholmod_dense** A, cholmod_common* c) {
   return cholmod_l_free_dense(A, c);
 }
Example #20
0
 Dense :: ~Dense( void )
 {
    if( data ) cholmod_l_free_dense( &data, common );
 }
Example #21
0
int main (int argc, char **argv)
{
    cholmod_sparse *A ;
    cholmod_dense *X, *B, *r, *atr ;
    double anorm, xnorm, rnorm, one [2] = {1,0}, minusone [2] = {-1,0}, t ;
    double zero [2] = {0,0}, atrnorm ;
    int mtype ;
    long m, n, rnk ;
    size_t total_mem, available_mem ;

    // start CHOLMOD
    cholmod_common *cc, Common ;
    cc = &Common ;
    cholmod_l_start (cc) ;

    // warmup the GPU.  This can take some time, but only needs
    // to be done once
    cc->useGPU = true ;
    t = SuiteSparse_time ( ) ;
    cholmod_l_gpu_memorysize (&total_mem, &available_mem, cc) ;
    cc->gpuMemorySize = available_mem ;
    t = SuiteSparse_time ( ) - t ;
    if (cc->gpuMemorySize <= 1)
    {
        printf ("no GPU available\n") ;
    }
    printf ("available GPU memory: %g MB, warmup time: %g\n",
        (double) (cc->gpuMemorySize) / (1024 * 1024), t) ;

    // A = mread (stdin) ; read in the sparse matrix A
    const char *filename = (argc < 2 ? "Problems/2.mtx" : argv[1]);
    FILE *file = fopen(filename, "r");
    A = (cholmod_sparse *) cholmod_l_read_matrix (file, 1, &mtype, cc) ;
    fclose(file);
    if (mtype != CHOLMOD_SPARSE)
    {
        printf ("input matrix must be sparse\n") ;
        exit (1) ;
    }

    // [m n] = size (A) ;
    m = A->nrow ;
    n = A->ncol ;

    long ordering = (argc < 3 ? SPQR_ORDERING_DEFAULT : atoi(argv[2]));

#if 1
    printf ("Matrix %6ld-by-%-6ld nnz: %6ld\n",
        m, n, cholmod_l_nnz (A, cc)) ;
#endif

    // anorm = norm (A,1) ;
    anorm = cholmod_l_norm_sparse (A, 1, cc) ;

    // B = ones (m,1), a dense right-hand-side of the same type as A
    B = cholmod_l_ones (m, 1, A->xtype, cc) ;

    // X = A\B ; with default ordering and default column 2-norm tolerance
    if (A->xtype == CHOLMOD_REAL)
    {
        // A, X, and B are all real
        X = SuiteSparseQR <double>(ordering, SPQR_NO_TOL, A, B, cc) ;
    }
    else
    {
#if SUPPORTS_COMPLEX
        // A, X, and B are all complex
        X = SuiteSparseQR < std::complex<double> >
            (SPQR_ORDERING_DEFAULT, SPQR_NO_TOL, A, B, cc) ;
#else
        printf("Code doesn't support std::complex<?> types.\n");
#endif
    }

    // get the rank(A) estimate
    rnk = cc->SPQR_istat [4] ;

    // compute the residual r, and A'*r, and their norms
    r = cholmod_l_copy_dense (B, cc) ;                  // r = B
    cholmod_l_sdmult (A, 0, one, minusone, X, r, cc) ;  // r = A*X-r = A*x-b
    rnorm = cholmod_l_norm_dense (r, 2, cc) ;           // rnorm = norm (r)
    atr = cholmod_l_zeros (n, 1, CHOLMOD_REAL, cc) ;    // atr = zeros (n,1)
    cholmod_l_sdmult (A, 1, one, zero, r, atr, cc) ;    // atr = A'*r
    atrnorm = cholmod_l_norm_dense (atr, 2, cc) ;       // atrnorm = norm (atr)

    // xnorm = norm (X)
    xnorm = cholmod_l_norm_dense (X, 2, cc) ;

    // write out X to a file
    FILE *f = fopen ("X.mtx", "w") ;
    cholmod_l_write_dense (f, X, NULL, cc) ;
    fclose (f) ;

    if (m <= n && anorm > 0 && xnorm > 0)
    {
        // find the relative residual, except for least-squares systems
        rnorm /= (anorm * xnorm) ;
    }
    printf ("\nnorm(Ax-b): %8.1e\n", rnorm) ;
    printf ("norm(A'(Ax-b))         %8.1e rank: %ld of %ld\n", 
        atrnorm, rnk, (m < n) ? m:n) ;

    /* Write an info file. */
    FILE *info = fopen("gpu_results.txt", "w");
    fprintf(info, "%ld\n", cc->SPQR_istat[7]);        // ordering method
    fprintf(info, "%ld\n", cc->memory_usage);         // memory usage (bytes)
    fprintf(info, "%30.16e\n", cc->SPQR_flopcount);   // flop count
    fprintf(info, "%lf\n", cc->SPQR_analyze_time);    // analyze time
    fprintf(info, "%lf\n", cc->SPQR_factorize_time);  // factorize time
    fprintf(info, "-1\n") ;                           // cpu memory (bytes)
    fprintf(info, "-1\n") ;                           // gpu memory (bytes)
    fprintf(info, "%32.16e\n", rnorm);                // residual
    fprintf(info, "%ld\n", cholmod_l_nnz (A, cc));    // nnz(A)
    fprintf(info, "%ld\n", cc->SPQR_istat [0]);       // nnz(R)
    fprintf(info, "%ld\n", cc->SPQR_istat [2]);       // # of frontal matrices
    fprintf(info, "%ld\n", cc->SPQR_istat [3]);       // ntasks, for now
    fprintf(info, "%lf\n", cc->gpuKernelTime);        // kernel time (ms)
    fprintf(info, "%ld\n", cc->gpuFlops);             // "actual" gpu flops
    fprintf(info, "%d\n", cc->gpuNumKernelLaunches);  // # of kernel launches
    fprintf(info, "%32.16e\n", atrnorm) ;             // norm (A'*(Ax-b))

    fclose(info);

    // free everything
    cholmod_l_free_dense (&r, cc) ;
    cholmod_l_free_dense (&atr, cc) ;
    cholmod_l_free_sparse (&A, cc) ;
    cholmod_l_free_dense (&X, cc) ;
    cholmod_l_free_dense (&B, cc) ;
    cholmod_l_finish (cc) ;

    return (0) ;
}