Esempio n. 1
0
/**
 * Return a CHOLMOD copy of the cached Cholesky decomposition with the
 * required perm, LDL and super attributes.  If Imult is nonzero,
 * update the numeric values before returning.
 *
 * If no cached copy is available then evaluate one, cache it (for
 * zero Imult), and return a copy.
 *
 * @param Ap     dsCMatrix object
 * @param perm   integer indicating if permutation is required (>0),
 *               forbidden (0) or optional (<0)
 * @param LDL    integer indicating if the LDL' form is required (>0),
 *               forbidden (0) or optional (<0)
 * @param super  integer indicating if the supernodal form is required (>0),
 *               forbidden (0) or optional (<0)
 * @param Imult  numeric multiplier of I in  |A + Imult * I|
 */
static CHM_FR
internal_chm_factor(SEXP Ap, int perm, int LDL, int super, double Imult)
{
    SEXP facs = GET_SLOT(Ap, Matrix_factorSym);
    SEXP nms = getAttrib(facs, R_NamesSymbol);
    int sup, ll;
    CHM_FR L;
    CHM_SP A = AS_CHM_SP__(Ap);
    R_CheckStack();

    if (LENGTH(facs)) {
	for (int i = 0; i < LENGTH(nms); i++) { /* look for a match in cache */
	    if (chk_nm(CHAR(STRING_ELT(nms, i)), perm, LDL, super)) {
		L = AS_CHM_FR(VECTOR_ELT(facs, i));
		R_CheckStack();
		/* copy the factor so later it can safely be cholmod_l_free'd */
		L = cholmod_l_copy_factor(L, &c);
		if (Imult) cholmod_l_factorize_p(A, &Imult, (int*)NULL, 0, L, &c);
		return L;
	    }
	}
    }
				/* No cached factor - create one */
    sup = c.supernodal;		/* save current settings */
    ll = c.final_ll;

    c.final_ll = (LDL == 0) ? 1 : 0;
    c.supernodal = (super > 0) ? CHOLMOD_SUPERNODAL : CHOLMOD_SIMPLICIAL;

    if (perm) {			/* obtain fill-reducing permutation */
	L = cholmod_l_analyze(A, &c);
    } else {			/* require identity permutation */
	/* save current settings */
	int nmethods = c.nmethods, ord0 = c.method[0].ordering,
	    postorder = c.postorder;
	c.nmethods = 1; c.method[0].ordering = CHOLMOD_NATURAL; c.postorder = FALSE;
	L = cholmod_l_analyze(A, &c);
	/* and now restore */
	c.nmethods = nmethods; c.method[0].ordering = ord0; c.postorder = postorder;
    }
    if (!cholmod_l_factorize_p(A, &Imult, (int*)NULL, 0 /*fsize*/, L, &c))
	error(_("Cholesky factorization failed"));
    c.supernodal = sup;		/* restore previous settings */
    c.final_ll = ll;

    if (!Imult) {		/* cache the factor */
	char fnm[12] = "sPDCholesky";
	if (super > 0) fnm[0] = 'S';
	if (perm == 0) fnm[1] = 'p';
	if (LDL == 0) fnm[2] = 'd';
	set_factors(Ap, chm_factor_to_SEXP(L, 0), fnm);
    }
    return L;
}
Esempio n. 2
0
   void Factor :: build( Upper& A )
   {
      if( L )
      {
         cholmod_l_free_factor( &L, common );
         L = NULL;
      }

      L = cholmod_l_analyze( *A, common );
      cout << "factor" << endl;
      cholmod_l_factorize( *A, L, common );
      cout << "factor2" << endl;
   }
Esempio n. 3
0
File: chol2.c Progetto: GHilmarG/Ua
void mexFunction
(
    int	nargout,
    mxArray *pargout [ ],
    int	nargin,
    const mxArray *pargin [ ]
)
{
    double dummy = 0, *px ;
    cholmod_sparse Amatrix, *A, *Lsparse, *R ;
    cholmod_factor *L ;
    cholmod_common Common, *cm ;
    Long n, minor ;

    /* ---------------------------------------------------------------------- */
    /* start CHOLMOD and set parameters */ 
    /* ---------------------------------------------------------------------- */

    cm = &Common ;
    cholmod_l_start (cm) ;
    sputil_config (SPUMONI, cm) ;

    /* convert to packed LL' when done */
    cm->final_asis = FALSE ;
    cm->final_super = FALSE ;
    cm->final_ll = TRUE ;
    cm->final_pack = TRUE ;
    cm->final_monotonic = TRUE ;

    /* no need to prune entries due to relaxed supernodal amalgamation, since
     * zeros are dropped with sputil_drop_zeros instead */
    cm->final_resymbol = FALSE ;

    cm->quick_return_if_not_posdef = (nargout < 2) ;

    /* ---------------------------------------------------------------------- */
    /* get inputs */
    /* ---------------------------------------------------------------------- */

    if (nargin != 1 || nargout > 3)
    {
	mexErrMsgTxt ("usage: [R,p,q] = chol2 (A)") ;
    }

    n = mxGetN (pargin [0]) ;

    if (!mxIsSparse (pargin [0]) || n != mxGetM (pargin [0]))
    {
    	mexErrMsgTxt ("A must be square and sparse") ;
    }

    /* get input sparse matrix A.  Use triu(A) only */
    A = sputil_get_sparse (pargin [0], &Amatrix, &dummy, 1) ;

    /* use natural ordering if no q output parameter */
    if (nargout < 3)
    {
	cm->nmethods = 1 ;
	cm->method [0].ordering = CHOLMOD_NATURAL ;
	cm->postorder = FALSE ;
    }

    /* ---------------------------------------------------------------------- */
    /* analyze and factorize */
    /* ---------------------------------------------------------------------- */

    L = cholmod_l_analyze (A, cm) ;
    cholmod_l_factorize (A, L, cm) ;

    if (nargout < 2 && cm->status != CHOLMOD_OK)
    {
	mexErrMsgTxt ("matrix is not positive definite") ;
    }

    /* ---------------------------------------------------------------------- */
    /* convert L to a sparse matrix */
    /* ---------------------------------------------------------------------- */

    /* the conversion sets L->minor back to n, so get a copy of it first */
    minor = L->minor ;
    Lsparse = cholmod_l_factor_to_sparse (L, cm) ;
    if (Lsparse->xtype == CHOLMOD_COMPLEX)
    {
	/* convert Lsparse from complex to zomplex */
	cholmod_l_sparse_xtype (CHOLMOD_ZOMPLEX, Lsparse, cm) ;
    }

    if (minor < n)
    {
	/* remove columns minor to n-1 from Lsparse */
	sputil_trim (Lsparse, minor, cm) ;
    }

    /* drop zeros from Lsparse */
    sputil_drop_zeros (Lsparse) ;

    /* Lsparse is lower triangular; conjugate transpose to get R */
    R = cholmod_l_transpose (Lsparse, 2, cm) ;
    cholmod_l_free_sparse (&Lsparse, cm) ;

    /* ---------------------------------------------------------------------- */
    /* return results to MATLAB */
    /* ---------------------------------------------------------------------- */

    /* return R */
    pargout [0] = sputil_put_sparse (&R, cm) ;

    /* return minor (translate to MATLAB convention) */
    if (nargout > 1)
    {
	pargout [1] = mxCreateDoubleMatrix (1, 1, mxREAL) ;
	px = mxGetPr (pargout [1]) ;
	px [0] = ((minor == n) ? 0 : (minor+1)) ;
    }

    /* return permutation */
    if (nargout > 2)
    {
	pargout [2] = sputil_put_int (L->Perm, n, 1) ;
    }

    /* ---------------------------------------------------------------------- */
    /* free workspace and the CHOLMOD L, except for what is copied to MATLAB */
    /* ---------------------------------------------------------------------- */

    cholmod_l_free_factor (&L, cm) ;
    cholmod_l_finish (cm) ;
    cholmod_l_print_common (" ", cm) ;
    /*
    if (cm->malloc_count != (3 + mxIsComplex (pargout[0]))) mexErrMsgTxt ("!") ;
    */
}
Esempio n. 4
0
void mexFunction
(
    int	nargout,
    mxArray *pargout [ ],
    int	nargin,
    const mxArray *pargin [ ]
)
{
    double dummy = 0, beta [2], *px ;
    cholmod_sparse Amatrix, *A, *Lsparse ;
    cholmod_factor *L ;
    cholmod_common Common, *cm ;
    Long n, minor ;

    /* ---------------------------------------------------------------------- */
    /* start CHOLMOD and set parameters */ 
    /* ---------------------------------------------------------------------- */

    cm = &Common ;
    cholmod_l_start (cm) ;
    sputil_config (SPUMONI, cm) ;

    /* convert to packed LDL' when done */
    cm->final_asis = FALSE ;
    cm->final_super = FALSE ;
    cm->final_ll = FALSE ;
    cm->final_pack = TRUE ;
    cm->final_monotonic = TRUE ;

    /* since numerically zero entries are NOT dropped from the symbolic
     * pattern, we DO need to drop entries that result from supernodal
     * amalgamation. */
    cm->final_resymbol = TRUE ;

    cm->quick_return_if_not_posdef = (nargout < 2) ;

    /* This will disable the supernodal LL', which will be slow. */
    /* cm->supernodal = CHOLMOD_SIMPLICIAL ; */

    /* ---------------------------------------------------------------------- */
    /* get inputs */
    /* ---------------------------------------------------------------------- */

    if (nargin < 1 || nargin > 2 || nargout > 3)
    {
	mexErrMsgTxt ("usage: [L,p,q] = ldlchol (A,beta)") ;
    }

    n = mxGetM (pargin [0]) ;

    if (!mxIsSparse (pargin [0]))
    {
    	mexErrMsgTxt ("A must be sparse") ;
    }
    if (nargin == 1 && n != mxGetN (pargin [0]))
    {
    	mexErrMsgTxt ("A must be square") ;
    }

    /* get sparse matrix A, use tril(A)  */
    A = sputil_get_sparse (pargin [0], &Amatrix, &dummy, -1) ; 

    if (nargin == 1)
    {
	A->stype = -1 ;	    /* use lower part of A */
	beta [0] = 0 ;
	beta [1] = 0 ;
    }
    else
    {
	A->stype = 0 ;	    /* use all of A, factorizing A*A' */
	beta [0] = mxGetScalar (pargin [1]) ;
	beta [1] = 0 ;
    }

    /* use natural ordering if no q output parameter */
    if (nargout < 3)
    {
	cm->nmethods = 1 ;
	cm->method [0].ordering = CHOLMOD_NATURAL ;
	cm->postorder = FALSE ;
    }

    /* ---------------------------------------------------------------------- */
    /* analyze and factorize */
    /* ---------------------------------------------------------------------- */

    L = cholmod_l_analyze (A, cm) ;
    cholmod_l_factorize_p (A, beta, NULL, 0, L, cm) ;

    if (nargout < 2 && cm->status != CHOLMOD_OK)
    {
	mexErrMsgTxt ("matrix is not positive definite") ;
    }

    /* ---------------------------------------------------------------------- */
    /* convert L to a sparse matrix */
    /* ---------------------------------------------------------------------- */

    /* the conversion sets L->minor back to n, so get a copy of it first */
    minor = L->minor ;
    Lsparse = cholmod_l_factor_to_sparse (L, cm) ;
    if (Lsparse->xtype == CHOLMOD_COMPLEX)
    {
	/* convert Lsparse from complex to zomplex */
	cholmod_l_sparse_xtype (CHOLMOD_ZOMPLEX, Lsparse, cm) ;
    }

    /* ---------------------------------------------------------------------- */
    /* return results to MATLAB */
    /* ---------------------------------------------------------------------- */

    /* return L as a sparse matrix (it may contain numerically zero entries) */
    pargout [0] = sputil_put_sparse (&Lsparse, cm) ;

    /* return minor (translate to MATLAB convention) */
    if (nargout > 1)
    {
	pargout [1] = mxCreateDoubleMatrix (1, 1, mxREAL) ;
	px = mxGetPr (pargout [1]) ;
	px [0] = ((minor == n) ? 0 : (minor+1)) ;
    }

    /* return permutation */
    if (nargout > 2)
    {
	pargout [2] = sputil_put_int (L->Perm, n, 1) ;
    }

    /* ---------------------------------------------------------------------- */
    /* free workspace and the CHOLMOD L, except for what is copied to MATLAB */
    /* ---------------------------------------------------------------------- */

    cholmod_l_free_factor (&L, cm) ;
    cholmod_l_finish (cm) ;
    cholmod_l_print_common (" ", cm) ;
    /*
    if (cm->malloc_count != 3 + mxIsComplex (pargout[0])) mexErrMsgTxt ("!") ;
    */
}
Esempio n. 5
0
void mexFunction
(
    int nargout,
    mxArray *pargout [ ],
    int nargin,
    const mxArray *pargin [ ]
)
{
    double dummy = 0 ;
    cholmod_factor *L ;
    cholmod_sparse *A, Amatrix, *C, *S ;
    cholmod_common Common, *cm ;
    Long n, transpose, c ;
    char buf [LEN] ;

    /* ---------------------------------------------------------------------- */
    /* start CHOLMOD and set defaults */
    /* ---------------------------------------------------------------------- */

    cm = &Common ;
    cholmod_l_start (cm) ;
    sputil_config (SPUMONI, cm) ;

    /* only do the simplicial analysis (L->Perm and L->ColCount) */
    cm->supernodal = CHOLMOD_SIMPLICIAL ;

    /* ---------------------------------------------------------------------- */
    /* get inputs */
    /* ---------------------------------------------------------------------- */

    if (nargout > 2 || nargin < 1 || nargin > 3)
    {
	mexErrMsgTxt ("Usage: [p count] = analyze (A, mode)") ;
    }
    if (nargin == 3)
    {
	cm->nmethods = mxGetScalar (pargin [2]) ;
	if (cm->nmethods == -1)
	{
	    /* use AMD only */
	    cm->nmethods = 1 ;
	    cm->method [0].ordering = CHOLMOD_AMD ;
	    cm->postorder = TRUE ;
	}
	else if (cm->nmethods == -2)
	{
	    /* use METIS only */
	    cm->nmethods = 1 ;
	    cm->method [0].ordering = CHOLMOD_METIS ;
	    cm->postorder = TRUE ;
	}
	else if (cm->nmethods == -3)
	{
	    /* use NESDIS only */
	    cm->nmethods = 1 ;
	    cm->method [0].ordering = CHOLMOD_NESDIS ;
	    cm->postorder = TRUE ;
	}
    }

    /* ---------------------------------------------------------------------- */
    /* get input matrix A */
    /* ---------------------------------------------------------------------- */

    A = sputil_get_sparse_pattern (pargin [0], &Amatrix, &dummy, cm) ;
    S = (A == &Amatrix) ? NULL : A ;

    /* ---------------------------------------------------------------------- */
    /* get A->stype, default is to use tril(A) */
    /* ---------------------------------------------------------------------- */

    A->stype = -1 ;
    transpose = FALSE ;

    if (nargin > 1)
    {
	buf [0] = '\0' ;
	if (mxIsChar (pargin [1]))
	{
	    mxGetString (pargin [1], buf, LEN) ;
	}
	c = buf [0] ;
	if (tolower (c) == 'r')
	{
	    /* unsymmetric case (A*A') if string starts with 'r' */
	    transpose = FALSE ;
	    A->stype = 0 ;
	}
	else if (tolower (c) == 'c')
	{
	    /* unsymmetric case (A'*A) if string starts with 'c' */
	    transpose = TRUE ;
	    A->stype = 0 ;
	}
	else if (tolower (c) == 's')
	{
	    /* symmetric case (A) if string starts with 's' */
	    transpose = FALSE ;
	    A->stype = -1 ;
	}
	else
	{
	    mexErrMsgTxt ("analyze: unrecognized mode") ;
	}
    }

    if (A->stype && A->nrow != A->ncol)
    {
	mexErrMsgTxt ("analyze: A must be square") ;
    }

    C = NULL ;
    if (transpose)
    {
	/* C = A', and then order C*C' */
	C = cholmod_l_transpose (A, 0, cm) ;
	if (C == NULL)
	{
	    mexErrMsgTxt ("analyze failed") ;
	}
	A = C ;
    }

    n = A->nrow ;

    /* ---------------------------------------------------------------------- */
    /* analyze and order the matrix */
    /* ---------------------------------------------------------------------- */

    L = cholmod_l_analyze (A, cm) ;

    /* ---------------------------------------------------------------------- */
    /* return Perm */
    /* ---------------------------------------------------------------------- */

    pargout [0] = sputil_put_int (L->Perm, n, 1) ;
    if (nargout > 1)
    {
	pargout [1] = sputil_put_int (L->ColCount, n, 0) ;
    }

    /* ---------------------------------------------------------------------- */
    /* free workspace */
    /* ---------------------------------------------------------------------- */

    cholmod_l_free_factor (&L, cm) ;
    cholmod_l_free_sparse (&C, cm) ;
    cholmod_l_free_sparse (&S, cm) ;
    cholmod_l_finish (cm) ;
    cholmod_l_print_common (" ", cm) ;
    /* if (cm->malloc_count != 0) mexErrMsgTxt ("!") ; */
}
Esempio n. 6
0
void mexFunction
(
    int	nargout,
    mxArray *pargout [ ],
    int	nargin,
    const mxArray *pargin [ ]
    )
{
  double dummy = 0, beta [2], *px, *C, *Ct, *C2, *fil, *Zt, *zt, done=1.0, *zz, dzero=0.0;
  cholmod_sparse Amatrix, *A, *Lsparse ;
  cholmod_factor *L ;
  cholmod_common Common, *cm ;
  Int minor, *It2, *Jt2 ;
  mwIndex l, k2, h, k, i, j, ik, *I, *J, *Jt, *It, *I2, *J2, lfi, *w, *w2, *r;
  mwSize nnz, nnzlow, m, n;
  int nz = 0;
  mwSignedIndex one=1, lfi_si;
  mxArray *Am, *Bm;
  char *uplo="L", *trans="N";
  

  /* ---------------------------------------------------------------------- */
  /* Only one input. We have to find first the Cholesky factorization.      */ 
  /* start CHOLMOD and set parameters */ 
  /* ---------------------------------------------------------------------- */

  if (nargin == 1) {
    cm = &Common ;
    cholmod_l_start (cm) ;
    sputil_config (SPUMONI, cm) ;
    
    /* convert to packed LDL' when done */
    cm->final_asis = FALSE ;
    cm->final_super = FALSE ;
    cm->final_ll = FALSE ;
    cm->final_pack = TRUE ;
    cm->final_monotonic = TRUE ;

    /* since numerically zero entries are NOT dropped from the symbolic
     * pattern, we DO need to drop entries that result from supernodal
     * amalgamation. */
    cm->final_resymbol = TRUE ;

    cm->quick_return_if_not_posdef = (nargout < 2) ;
  }

  /* This will disable the supernodal LL', which will be slow. */
  /* cm->supernodal = CHOLMOD_SIMPLICIAL ; */
  
  /* ---------------------------------------------------------------------- */
  /* get inputs */
  /* ---------------------------------------------------------------------- */
  
  if (nargin > 3)
    {
      mexErrMsgTxt ("usage: Z = sinv(A), or Z = sinv(LD, 1)") ;
    }
  
  n = mxGetM (pargin [0]) ;
  m = mxGetM (pargin [0]) ;
  
  if (!mxIsSparse (pargin [0]))
    {
      mexErrMsgTxt ("A must be sparse") ;
    }
  if (n != mxGetN (pargin [0]))
    {
      mexErrMsgTxt ("A must be square") ;
    }

  /* Only one input. We have to find first the Cholesky factorization.      */
  if (nargin == 1) {
    /* get sparse matrix A, use tril(A)  */
    A = sputil_get_sparse (pargin [0], &Amatrix, &dummy, -1) ; 
    
    A->stype = -1 ;	    /* use lower part of A */
    beta [0] = 0 ;
    beta [1] = 0 ;
    
    /* ---------------------------------------------------------------------- */
    /* analyze and factorize */
    /* ---------------------------------------------------------------------- */
    
    L = cholmod_l_analyze (A, cm) ;
    cholmod_l_factorize_p (A, beta, NULL, 0, L, cm) ;
    
    if (cm->status != CHOLMOD_OK)
      {
	mexErrMsgTxt ("matrix is not positive definite") ;
      }
    
    /* ---------------------------------------------------------------------- */
    /* convert L to a sparse matrix */
    /* ---------------------------------------------------------------------- */

    Lsparse = cholmod_l_factor_to_sparse (L, cm) ;
    if (Lsparse->xtype == CHOLMOD_COMPLEX)
      {
	mexErrMsgTxt ("matrix is complex") ;
      }
    
    /* ---------------------------------------------------------------------- */
    /* Set the sparse Cholesky factorization in Matlab format */
    /* ---------------------------------------------------------------------- */
    /*Am = sputil_put_sparse (&Lsparse, cm) ;
      I = mxGetIr(Am);
      J = mxGetJc(Am);
      C = mxGetPr(Am);
      nnz = mxGetNzmax(Am); */

    It2 = Lsparse->i;
    Jt2 = Lsparse->p;
    Ct = Lsparse->x;
    nnz = (mwSize) Lsparse->nzmax;

    Am = mxCreateSparse(m, m, nnz, mxREAL) ;
    I = mxGetIr(Am);
    J = mxGetJc(Am);
    C = mxGetPr(Am);
    for (j = 0 ;  j < n+1 ; j++)  J[j] = (mwIndex) Jt2[j];
    for ( i = 0 ; i < nnz ; i++) {
	I[i] = (mwIndex) It2[i];
	C[i] = Ct[i];
    }
    
    cholmod_l_free_sparse (&Lsparse, cm) ;

    /*FILE *out1 = fopen( "output1.txt", "w" );
    if( out1 != NULL )
      fprintf( out1, "Hello %d\n", nnz );
      fclose (out1);*/
    
  } else {
    /* The cholesky factorization is given as an input.      */
    /* We have to copy it into workspace                     */
    It = mxGetIr(pargin [0]);
    Jt = mxGetJc(pargin [0]);
    Ct = mxGetPr(pargin [0]);
    nnz = mxGetNzmax(pargin [0]);
    
    Am = mxCreateSparse(m, m, nnz, mxREAL) ;
    I = mxGetIr(Am);
    J = mxGetJc(Am);
    C = mxGetPr(Am);
    for (j = 0 ;  j < n+1 ; j++)  J[j] = Jt[j];
    for ( i = 0 ; i < nnz ; i++) {
	I[i] = It[i];
	C[i] = Ct[i];
    }    
  }

  /* Evaluate the sparse inverse */
  C[nnz-1] = 1.0/C[J[m-1]];               /* set the last element of sparse inverse */
  fil = mxCalloc((mwSize)1,sizeof(double));
  zt = mxCalloc((mwSize)1,sizeof(double));
  Zt = mxCalloc((mwSize)1,sizeof(double));
  zz = mxCalloc((mwSize)1,sizeof(double));
  for (j=m-2;j!=-1;j--){
    lfi = J[j+1]-(J[j]+1);
    
    /* if (lfi > 0) */
    if ( J[j+1] > (J[j]+1) )
      {
	/*	printf("lfi = %u \n ", lfi);
	printf("lfi*double = %u \n", (mwSize)lfi*sizeof(double));
	printf("lfi*lfi*double = %u \n", (mwSize)lfi*(mwSize)lfi*sizeof(double));
	printf("\n \n");
	*/
	
	fil = mxRealloc(fil,(mwSize)lfi*sizeof(double));
	for (i=0;i<lfi;i++) fil[i] = C[J[j]+i+1];                   /* take the j'th lower triangular column of the Cholesky */
	
	zt = mxRealloc(zt,(mwSize)lfi*sizeof(double));              /* memory for the sparse inverse elements to be evaluated */
	Zt = mxRealloc(Zt,(mwSize)lfi*(mwSize)lfi*sizeof(double));  /* memory for the needed sparse inverse elements */
	
	/* Set the lower triangular for Zt */
	k2 = 0;
	for (k=J[j]+1;k<J[j+1];k++){
	  ik = I[k];
	  h = k2;
	  for (l=J[ik];l<=J[ik+1];l++){
	    if (I[l] == I[ J[j]+h+1 ]){
	      Zt[h+lfi*k2] = C[l];
	      h++;
	    }
	  }
	  k2++;
	}
	
	
	/* evaluate zt = fil*Zt */
	lfi_si = (mwSignedIndex) lfi;
	dsymv(uplo, &lfi_si, &done, Zt, &lfi_si, fil, &one, &dzero, zt, &one);
	
	/* Set the evaluated sparse inverse elements, zt, into C */
	k=lfi-1;
	for (i = J[j+1]-1; i!=J[j] ; i--){
	  C[i] = -zt[k];
	  k--;
	}
	/* evaluate the j'th diagonal of sparse inverse */
	dgemv(trans, &one, &lfi_si, &done, fil, &one, zt, &one, &dzero, zz, &one); 
	C[J[j]] = 1.0/C[J[j]] + zz[0];
      }
    else
      {
	/* evaluate the j'th diagonal of sparse inverse */
	C[J[j]] = 1.0/C[J[j]];	
      }
  }
    
  /* Free the temporary variables */
  mxFree(fil);
  mxFree(zt);
  mxFree(Zt);
  mxFree(zz);

  /* ---------------------------------------------------------------------- */
  /* Permute the elements according to r(q) = 1:n                           */
  /* Done only if the Cholesky was evaluated here                           */
  /* ---------------------------------------------------------------------- */
  if (nargin == 1) {
   
    Bm = mxCreateSparse(m, m, nnz, mxREAL) ;     
    It = mxGetIr(Bm);
    Jt = mxGetJc(Bm);
    Ct = mxGetPr(Bm);                            /* Ct = C(r,r) */ 
    
    r = (mwIndex *) L->Perm;                         /* fill reducing ordering */
    w = mxCalloc(m,sizeof(mwIndex));                 /* column counts of Am */
    
    /* count entries in each column of Bm */
    for (j=0; j<m; j++){
      k = r ? r[j] : j ;       /* column j of Bm is column k of Am */
      for (l=J[j] ; l<J[j+1] ; l++){
	i = I[l];
	ik = r ? r[i] : i ;    /* row i of Bm is row ik of Am */
	w[ max(ik,k) ]++;
      }
    }
    cumsum2(Jt, w, m);
    for (j=0; j<m; j++){
      k = r ? r[j] : j ;             /* column j of Bm is column k of Am */
      for (l=J[j] ; l<J[j+1] ; l++){
	i= I[l];
	ik = r ? r[i] : i ;          /* row i of Bm is row ik of Am */
	It [k2 = w[max(ik,k)]++ ] = min(ik,k);
	Ct[k2] = C[l];
      }
    }
    mxFree(w);
    
    /* ---------------------------------------------------------------------- */
    /* Transpose the permuted (upper triangular) matrix Bm into Am */
    /* (this way we get sorted columns)                            */
    /* ---------------------------------------------------------------------- */
    w = mxCalloc(m,sizeof(mwIndex));                 
    for (i=0 ; i<Jt[m] ; i++) w[It[i]]++;        /* row counts of Bm */
    cumsum2(J, w, m);                            /* row pointers */
    for (j=0 ; j<m ; j++){
      for (i=Jt[j] ; i<Jt[j+1] ; i++){
	I[ l=w[ It[i] ]++ ] = j;
	C[l] = Ct[i];
      }
    }
    mxFree(w);
    mxDestroyArray(Bm);
  }
  
  /* ---------------------------------------------------------------------- */
  /* Fill the upper triangle of the sparse inverse */
  /* ---------------------------------------------------------------------- */
  
  w = mxCalloc(m,sizeof(mwIndex));        /* workspace */
  w2 = mxCalloc(m,sizeof(mwIndex));       /* workspace */
  for (k=0;k<J[m];k++) w[I[k]]++;     /* row counts of the lower triangular */
  for (k=0;k<m;k++) w2[k] = w[k] + J[k+1] - J[k] - 1;   /* column counts of the sparse inverse */
  
  nnz = (mwSize)2*nnz - m;                       /* The number of nonzeros in Z */
  pargout[0] = mxCreateSparse(m,m,nnz,mxREAL);   /* The sparse matrix */
  It = mxGetIr(pargout[0]);
  Jt = mxGetJc(pargout[0]);
  Ct = mxGetPr(pargout[0]);
  
  cumsum2(Jt, w2, m);               /* column starting points */
  for (j = 0 ; j < m ; j++){           /* fill the upper triangular */
    for (k = J[j] ; k < J[j+1] ; k++){
      It[l = w2[ I[k]]++] = j ;	 /* place C(i,j) as entry Ct(j,i) */
      if (Ct) Ct[l] = C[k] ;
    }
  }
  for (j = 0 ; j < m ; j++){           /* fill the lower triangular */
    for (k = J[j]+1 ; k < J[j+1] ; k++){
      It[l = w2[j]++] = I[k] ;         /* place C(j,i) as entry Ct(j,i) */
      if (Ct) Ct[l] = C[k] ;
    }
  }
  
  mxFree(w2);
  mxFree(w);
  
  /* ---------------------------------------------------------------------- */
  /* return to MATLAB */
  /* ---------------------------------------------------------------------- */
  
  /* ---------------------------------------------------------------------- */
  /* free workspace and the CHOLMOD L, except for what is copied to MATLAB */
  /* ---------------------------------------------------------------------- */
  if (nargin == 1) {
    cholmod_l_free_factor (&L, cm) ;
    cholmod_l_finish (cm) ;
    cholmod_l_print_common (" ", cm) ;
  }
  mxDestroyArray(Am);
  
}
Esempio n. 7
0
 static cholmod_factor* analyze(cholmod_sparse* A, cholmod_common* c) {
   return cholmod_l_analyze(A, c);
 }
int main(int argc, char* argv[])
{
	const int bufsize = 512;
    	char buffer[bufsize];
	int m,n,S;
	double time_st,time_end,time_avg;
	//omp_set_num_threads(2);
//	printf("\n-----------------\nnumber of threads fired = %d\n-----------------\n",(int)omp_get_num_threads());
	if(argc!=2)
	{
		cout<<"Insufficient arguments"<<endl;
		return 1;
	}
	
	graph G;

	cerr<<"Start reading                    ";
//	time_st=dsecnd();
	G.create_graph(argv[1]);
//	time_end=dsecnd();
//	time_avg = (time_end-time_st);
//	cout<<"Success              "<<endl;
//	cerr<<"Reading time                     "<<time_avg<<endl;

	cerr<<"Constructing Matrices            ";
//	time_st=dsecnd();
	G.construct_MNA();
	G.construct_NA();
//	time_end=dsecnd();
//	time_avg = (time_end-time_st);
//	cerr<<"Done                 "<<time_avg<<endl;

//	G.construct_sparse_MNA();
	m=G.node_array.size()-1;
	n=G.voltage_edge_id.size();
	
	cout<<endl;
	cout<<"MATRIX STAT:"<<endl;
	cout<<"Nonzero elements:               "<<G.nonzero<<endl;
	cout<<"Number of Rows:                 "<<m+n<<endl;
	cout<<"Nonzero in G:			"<<G.Gnonzero<<endl;
	cout<<"Number of rows in G:		"<<m<<endl;
	cout<<"Nonzero in P: 			"<<G.Pnonzero<<endl;
	cout<<"Number of rows in P:		"<<m<<endl;


//	printf("\n Nonzero = %d", G.nonzero);
//	printf("\n Rows = %d", m+n);

	cout<<"MAT val:		       "<<endl;
	int i,j;

	G.Mat_val[0] += 100;
	G.Gmat[0] +=100;
/*
	for(i=0;i<G.Gnonzero;i++)
		cout<<" "<<G.Gmat[i];
	cout<<endl;
	for(i=0;i<G.Gnonzero;i++)
		cout<<" "<<G.Gcolumns[i];
	cout<<endl;	
	for(i=0;i<m+1;i++)
		cout<<" "<<G.GrowIndex[i];
	cout<<endl;
	for(i=0;i<m;i++)
		printf(" %.8f", G.b1[i]);
	cout<<endl;
	for(i=0;i<m;i++)
		printf(" %.8f", G.x1[i]);
	cout<<endl;	
	
	
*/	SuiteSparse_long *Gnz = (SuiteSparse_long*)calloc(m,sizeof(SuiteSparse_long));
	for(i=0;i<m;i++)
	{
	//	cout<<endl;
		SuiteSparse_long startindex=G.GrowIndex[i];
		SuiteSparse_long endindex=G.GrowIndex[i+1];
		Gnz[i] = endindex - startindex;

//		for(j=startindex;j<endindex;j++)
//			cout<<" "<<G.Gmat[j];
//		cout<<endl;
		
	}


/*	for(i=0;i<G.Pnonzero;i++)
		cout<<" "<<G.Pmat[i];
	cout<<endl;
	for(i=0;i<G.Pnonzero;i++)
		cout<<" "<<G.Pcolumns[i];
	cout<<endl;	
	for(i=0;i<m+1;i++)
		cout<<" "<<G.ProwIndex[i];
	cout<<endl;
/*	for(i=0;i<m;i++)
		printf(" %.8f", G.b1[i]);
	cout<<endl;
	for(i=0;i<m;i++)
		printf(" %.8f", G.x1[i]);
	cout<<endl;	
	
	
	for(i=0;i<m;i++)
	{
		cout<<endl;
		int startindex=G.ProwIndex[i];
		int endindex=G.ProwIndex[i+1];
		for(j=startindex;j<endindex;j++)
			cout<<" "<<G.Pmat[j];
		cout<<endl;
		
	}
	
/*	for(i=0;i<G.nonzero;i++)
		cout<<" "<<G.Mat_val[i];
	cout<<endl;
	for(i=0;i<G.nonzero;i++)
		cout<<" "<<G.columns[i];
	cout<<endl;	
	for(i=0;i<m+n+1;i++)
		cout<<" "<<G.rowIndex[i];
	cout<<endl;
	for(i=0;i<m+n;i++)
		printf(" %.8f", G.b[i]);
	cout<<endl;
	for(i=0;i<m+n;i++)
		printf(" %.8f", G.x[i]);
	cout<<endl;	
	
	
	for(i=0;i<m+n;i++)
	{
		cout<<endl;
		int startindex=G.rowIndex[i];
		int endindex=G.rowIndex[i+1];
		for(j=startindex;j<endindex;j++)
			cout<<" "<<G.Mat_val[j];
		cout<<endl;
		
	}
*/
/*	for (i=0;i<m+n+1;i++)
	{
		//cout<<endl;
		if(G.rowIndex[i]==G.rowIndex[i+1])
			break;
		
		for(j=G.rowIndex[i];j<G.rowIndex[i+1];j++)
		{
			if(G.Mat_val[j]>10)
				cout<<G.Mat_val[j]<<"\t";
		}
		//cout<<endl;
		/*for(j=G.rowIndex[i];j<G.rowIndex[i+1];j++)
		{
			cout<<G.columns[j]<<"\t";
		}
		//cout<<endl;
	}
	cout<<endl;
*/

//printing the matrix
	printf("\n Fine till here");
	printf("\n");
//	int* rowmIndex=(int*)calloc(m+1,sizeof(int));
	printf("\n Fine till here");
	printf("\n");
	//int rowmIndex[5]={1,2,3,4,5};
/*	for(i=0;i<m+1;i++)
	{
		rowmIndex[i]=G.rowIndex[i];
		printf(" %d", rowmIndex[i]);
	}
*/
	printf("\n Allocating GPU memory\n");
	cudaDeviceReset();
	size_t free, total;
	cudaMemGetInfo(&free, &total);
	printf("\n Free Mem = %lf MB, Total mem = %lf MB\n", (double)(free)/(1024*1024), (double)(total)/(1024*1024));


	double *dev_csrValA, *dev_b, *dev_x;
	int *dev_csrRowIdxA, *dev_csrColA;

	double *dev_GcsrVal, *dev_b1, *dev_x1;
	double *dev_PcsrVal, *dev_b2, *dev_x2;
	
	int *dev_GcsrRowIdx, *dev_PcsrRowIdx, *dev_GcsrCol, *dev_PcsrCol;
	

	
	cudaMalloc((void**)&dev_PcsrVal, G.Pnonzero*sizeof(double));
	cudaMalloc((void**)&dev_PcsrRowIdx, (m+1)*sizeof(int));
	cudaMalloc((void**)&dev_PcsrCol, G.Pnonzero*sizeof(int));


	cudaMalloc((void**)&dev_b1, (m)*sizeof(double));
	cudaMalloc((void**)&dev_b2, n*sizeof(double));
	cudaMalloc((void**)&dev_x1, m*sizeof(double));
	cudaMalloc((void**)&dev_x2, n*sizeof(double));

	cudaMemcpy(dev_b1, G.b1, (m)*sizeof(double), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_x1, G.x1, (m)*sizeof(double), cudaMemcpyHostToDevice);

	cudaMemcpy(dev_PcsrVal, G.Pmat, G.Pnonzero*sizeof(double), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_b2, G.b2, (n)*sizeof(double), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_x2, G.x2, (n)*sizeof(double), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_PcsrRowIdx, G.ProwIndex, (m+1)*sizeof(int), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_PcsrCol, G.Pcolumns, (G.Pnonzero)*sizeof(int), cudaMemcpyHostToDevice);


	/* Matrix has been created and stored in CSR format.
	However, CHOLMOD requires CSC format. Since our matrix is symmetric positive definite, we can simply swap 
	csrColA with csrRowIdx and vice versa
	*/

	/* Starting the CHOLMOD routine now*/
	printf("\n Initiating CHOLMOD\n");
	cholmod_sparse *A, *P;
	cholmod_dense *x, *b, *r, *midvec;
	cholmod_factor *L;
	cholmod_common *Common, cm;
	Common = &cm;
	cholmod_l_start(Common);
//	&Common->useGPU=1;
	printf("\n m = %d, G.Gnonzero = %d\n", m, G.Gnonzero);	


	
	cholmod_sparse *C = cholmod_l_allocate_sparse((size_t)(m), (size_t)(m), (size_t)(G.Gnonzero), 1, 0, 1, 1, Common); 
//	P = cholmod_l_allocate_sparse((size_t)(m), (size_t)(n), (size_t)(G.Pnonzero), 1, 0, 0, 1, Common); 
//	printf("\n Allocated \n");

	C->itype = CHOLMOD_LONG;	
//	printf("\n Itype \n");
	C->p = &G.GrowIndex[0];
//	printf("\n Columns \n");
	C->nz = &Gnz[0];
//	printf("\n Rows \n");
	C->i = &G.Gcolumns[0];
	C->dtype = 0;
	C->x = &G.Gmat[0];
	 
/*	P->itype = CHOLMOD_LONG;
	P->p = &G.ProwIndex[0];
	P->nz = &Pnz[0];
	P->i = &G.Pcolumns[0];
	P->dtype = 0;
	P->x = &G.Pmat[0]; 
*/	 

	b = cholmod_l_allocate_dense((size_t)(m), 1, (size_t)(m), 1, Common);
	b->dtype=0;
	b->x = &G.b1[0];
	b->xtype = 1;

	printf("\n CHOLMOD manually set\n");
	cholmod_l_print_sparse(C, "A", Common);
	cholmod_l_print_dense(b, "b", Common);


	cudaEvent_t start, stop;
	cudaEventCreate(&start);
	cudaEventCreate(&stop);

	cudaEventRecord(start, 0);

	L = cholmod_l_analyze(C, Common);
	printf("\n Analysis: Flops: %g \t lnz: %g\n", Common->fl, Common->lnz);
	cholmod_l_factorize(C, L, Common);
	x = cholmod_l_solve(CHOLMOD_A, L, b, Common);
	
	cudaEventRecord(stop, 0);
	cudaEventSynchronize(stop);
	
	

	float elapsedTime;
	cudaEventElapsedTime(&elapsedTime, start, stop);
	printf("\n Time : %.6f secs :\n", elapsedTime);
	
	cholmod_l_print_dense(x, "X", Common);
	
	double *x1_mod = (double*)x->x;
	
	cudaMemcpy(dev_x1, x1_mod, m*sizeof(double), cudaMemcpyHostToDevice);
	
	cusparseStatus_t cuSparseStatus;
	cusparseHandle_t cuspHandle;
	cuSparseStatus = cusparseCreate(&cuspHandle);

	cusparseMatDescr_t descrP;
	cusparseCreateMatDescr(&descrP);

	cusparseSetMatType(descrP, CUSPARSE_MATRIX_TYPE_GENERAL);	
	cusparseSetMatIndexBase(descrP, CUSPARSE_INDEX_BASE_ZERO);
	
	
	double *dev_res1, *dev_simple;
	double *res1 = (double*)calloc(n,sizeof(double));
	cudaMalloc((void**)&dev_res1, n*sizeof(double));
	cudaMalloc((void**)&dev_simple, n*sizeof(double));
	
	const double alpha = 1.0, beta=0.0;
	//alpha = 1.0;
	//beta = 0.0;
	
	//solving P^T * G^-1 * b1 Result stored in dev_res1
	
	cuSparseStatus = cusparseDcsrmv(cuspHandle, CUSPARSE_OPERATION_TRANSPOSE, m, n, G.Pnonzero, &alpha, descrP, dev_PcsrVal, dev_PcsrRowIdx, dev_PcsrCol, dev_x1, &beta, dev_res1);
		
	if(cuSparseStatus == CUSPARSE_STATUS_SUCCESS)
	{
/*		cudaMemcpy(res1, dev_res1, n*sizeof(double), cudaMemcpyDeviceToHost);
		for(i=0;i<n;i++)
		{
			printf("\nres1[%d] = %.8f", i, res1[i]);
		}
		printf("\n P^T * G^-1 * b1 done! Vector stored in res1");
*/	}
	else
	{
		printf("\n P^T * G^-1 * b1 failed\n");
		exit(1);
	}
	
	const double alphaneg = -1.0;
		
		//Solving P^T * G^-1 * b1 - b2 ; Result stored in dev_res1
	
		
	cublasStatus_t cuBlasStatus;
	cublasHandle_t cubHandle;
	cuBlasStatus = cublasCreate(&cubHandle);
		
	cuBlasStatus = cublasDaxpy(cubHandle, n, &alphaneg, dev_b2, 1, dev_res1, 1);
	if(cuBlasStatus == CUBLAS_STATUS_SUCCESS)
	{
//		cudaMemcpy(res1, dev_res1, n*sizeof(double), cudaMemcpyDeviceToHost);
//		for(i=0;i<n;i++)
//		{
//			printf("\nres1[%d] = %.8f", i, res1[i]);
//		}
		printf("\n res1 = res1 - b2 done\n");
		 
	}
	else
	{
		printf("\n res1 = res1 - b2 failed\n");
	}
		
	
	
	
	///NOW COMPUTING G^-1 * P
	
	
	int k = 0;
	int breakloop=0;
	
	double **midMat = (double**)malloc(m*sizeof(double*));
	for(i=0;i<m;i++)
	{
		midMat[i] = (double*)calloc(n,sizeof(double));
	}
	
	cudaEventRecord(start, 0);
	
	for(i=0;i<n;i++)
	{
		breakloop = 0;
		double *vect = (double*)calloc(m,sizeof(double*));

		for(j=0;j<m;j++)
		{
			int startin = G.ProwIndex[j];
			int endin = G.ProwIndex[j+1];
			if(startin == endin)
				continue;
				
			k = startin;

			while(k<endin)
			{

				if(G.Pcolumns[k] == i)
				{	
					vect[j] = G.Pmat[k];
					breakloop=1;
					break;
				
				}
				k++;
			}
			if(breakloop == 1)
			{
				break;
			}
		}
		
		midvec = cholmod_l_allocate_dense((size_t)(m), 1, (size_t)(m), 1, Common);
		midvec->dtype=0;
		midvec->x=&vect[0];
		midvec->xtype = 1;
		
		cholmod_dense *res2;
		
		res2 = cholmod_l_solve(CHOLMOD_A, L, midvec, Common);

		
		double *re = (double*)res2->x;
		
//		printf("\n vector %d is:\n", i);
		int i1, j1, k1;
//		for(j1=0;j1<m;j1++)
//		{
//			midmat2flat[i+j1*n] = re[j1];
//			printf(" %lf", re[j1]);
//		}
//		printf("\n");
		for(i1=0;i1<m;i1++)
		{
			midMat[i1][i] = re[i1];
		}
		
		cholmod_l_free_dense(&midvec, Common);
			
	}
	
/*	printf("\n Midmat = \n");
	for(i=0;i<m;i++)
	{
		for(j=0;j<n;j++)
		{
			printf(" %lf", midMat[i][j]);
		}
		printf("\n");
	} 
*/
	double *midMatflat = (double*)calloc((m*n),sizeof(double));
	double *dev_midMat;
	double *dev_solut;
	int counter = 0;
	for(i=0;i<n;i++)
	{
		for(j=0;j<m;j++)
		{
			midMatflat[counter] = midMat[j][i];
			counter++; 
		}
	}
	
	cudaMalloc((void**)&dev_midMat, m*n*sizeof(double));
	cudaMalloc((void**)&dev_solut, n*n*sizeof(double));
	
	cudaMemcpy(dev_midMat, midMatflat, m*n*sizeof(double), cudaMemcpyHostToDevice);
		
	//Solving P^T * midMat; Result stored in dev_solut
		
	cuSparseStatus = cusparseDcsrmm(cuspHandle, CUSPARSE_OPERATION_TRANSPOSE, m, n, n, G.Pnonzero, &alpha, descrP, dev_PcsrVal, dev_PcsrRowIdx, dev_PcsrCol, dev_midMat, m, &beta, dev_solut, n);
	
	if(cuSparseStatus == CUSPARSE_STATUS_SUCCESS)
	{
		printf("\n Solved P^T * G^-1 * P. Result stored in solut\n");
		
	}  
	else
	{
		printf("\n  Failed to Solve P^T * G^-1 * P \n");
		exit(1);
	}

/*	double *matGflat = (double*)calloc(n*n,sizeof(double));
	cudaMemcpy(matGflat, dev_solut, n*n*sizeof(double), cudaMemcpyDeviceToHost);
	counter = 0;
	printf("\nBefore LU starts\n");
	for(i=0;i<n;i++)
	{
		for(j=0;j<n;j++)
		{
			printf(" %lf ", matGflat[counter]);
			counter++;
		}
		printf("\n");
	}
	printf("\n");
*/	
	cusolverStatus_t cuSolverStatus;
	
	
	cusolverDnHandle_t cudenHandle;
	cuSolverStatus = cusolverDnCreate(&cudenHandle);
	int Lwork = 0;
	cuSolverStatus = cusolverDnDgetrf_bufferSize(cudenHandle, n, n, dev_solut, n, &Lwork);
	
	if(cuSolverStatus == CUSOLVER_STATUS_SUCCESS)
	{
		printf("\n Buffer works\n Lwork = %d\n", Lwork);
	}
	else
	{
		exit(1);	
	}
	double *dev_Workspace;
	int *dev_Ipiv, *dev_Info;
	
	cudaMalloc((void**)&dev_Workspace, Lwork*sizeof(double));
	cudaMalloc((void**)&dev_Ipiv, n*sizeof(int));
	cudaMalloc((void**)&dev_Info, sizeof(int));
	
	//Calculating LU for dev_solut
//	double *nnmat = (double*)calloc(n*n,sizeof(double));
//	cudaMemcpy(nnmat, dev_solut, n*n*sizeof(double), cudaMemcpyDeviceToHost);
//	cuSolverStatus = cusolverDnDgetrfHost(cudenHandle, n, n, 

	cuSolverStatus = cusolverDnDgetrf(cudenHandle, n, n, dev_solut, n, dev_Workspace, dev_Ipiv, dev_Info);
	
	if(cuSolverStatus == CUSOLVER_STATUS_SUCCESS)
	{
		printf("\n solut has be defactorized into L and U. dev_Ipiv * solut = L * U\n");
	}
	else
	{
	
		printf("\n Unable to defactorize solut into LU\n");
		exit(1);	
	}
	
	//solving dev_solut * x = dev_res1. Result stored in dev_res1
	
	cuSolverStatus = cusolverDnDgetrs(cudenHandle, CUBLAS_OP_N, n, 1, dev_solut, n, dev_Ipiv, dev_res1, n, dev_Info);
	if(cuSolverStatus == CUSOLVER_STATUS_SUCCESS)
	{
		printf("\n Solution obtained for x2 \n");
	}
	else
	{
		printf("\n LU decomposition obtained by LU solver failed\n");
	}

/*	cudaMemcpy(G.x2, dev_res1, n*sizeof(double), cudaMemcpyDeviceToHost);
	printf("\n x2 = \n");
	for(i=0;i<n;i++)
	{
		printf("\n x2[%d] = %lf", i, G.x2[i]); 
	}
*/	
	
	double *dev_dummy;
	cudaMalloc((void**)&dev_dummy, m*sizeof(double));
	cudaMemset(dev_dummy, 0.0, m*sizeof(double));
	
	printf("\n Starting solving for x1 \n");
	//Solving for x1
	
		//Solving G^-1 * P * x2;  G^-1 * P is stored in midMat
	cuBlasStatus = cublasDgemv(cubHandle, CUBLAS_OP_N, m, n, &alpha, dev_midMat, m, dev_res1, 1, &beta, dev_dummy, 1);
	if(cuBlasStatus == CUBLAS_STATUS_SUCCESS)
	{
/*		double *toprint = (double*)calloc(m,sizeof(double));
		cudaMemcpy(toprint, dev_dummy, m*sizeof(double), cudaMemcpyDeviceToHost);
		printf("\n Intermediate vector :\n");
		for(i=0;i<m;i++)
		{
			printf("\ndummy[%d] = %lf", i, toprint[i]);
		}
*/		printf("\n midmat * x2 obtained. Stored in dummy\n");
	}
	else
	{
		printf("\n Failed to obtain midmat * x2\n");
	}
	
	cuBlasStatus = cublasDaxpy(cubHandle, m, &alphaneg, dev_dummy, 1, dev_x1, 1);
	if(cuBlasStatus == CUBLAS_STATUS_SUCCESS)
	{
/*		cudaMemcpy(G.x1, dev_x1, m*sizeof(double), cudaMemcpyDeviceToHost);
		printf("\n x1 = \n");
		for(i=0;i<m;i++)
		{
			printf("\n x1[%d] = %.15f", i, G.x1[i]);
		}
*/		printf("\n x1 obtained");
	}
	else
	{
		printf("\n Failed to obtain x1");
	}

	printf("\n Solver finished its work\n");

/*		cudaEventRecord(stop, 0);
		cudaEventSynchronize(stop);

		cudaEventElapsedTime(&elapsedTime, start, stop);
		printf("\n Time: %.6f msecs :\n", elapsedTime);
*/








	
	
	
	
	

	cholmod_l_finish(Common);
	return 0;

}