Exemple #1
0
SEXP Csparse_dense_prod(SEXP a, SEXP b)
{
    CHM_SP cha = AS_CHM_SP(a);
    SEXP b_M = PROTECT(mMatrix_as_dgeMatrix(b));
    CHM_DN chb = AS_CHM_DN(b_M);
    CHM_DN chc = cholmod_l_allocate_dense(cha->nrow, chb->ncol, cha->nrow,
					chb->xtype, &c);
    SEXP dn = PROTECT(allocVector(VECSXP, 2));
    double one[] = {1,0}, zero[] = {0,0};
    int nprot = 2;
    R_CheckStack();
    /* Tim Davis, please FIXME:  currently (2010-11) *fails* when  a  is a pattern matrix:*/
    if(cha->xtype == CHOLMOD_PATTERN) {
	/* warning(_("Csparse_dense_prod(): cholmod_sdmult() not yet implemented for pattern./ ngCMatrix" */
	/* 	  " --> slightly inefficient coercion")); */

	// This *fails* to produce a CHOLMOD_REAL ..
	// CHM_SP chd = cholmod_l_copy(cha, cha->stype, CHOLMOD_REAL, &c);
	// --> use our Matrix-classes
	SEXP da = PROTECT(nz2Csparse(a, x_double)); nprot++;
	cha = AS_CHM_SP(da);
    }
    cholmod_l_sdmult(cha, 0, one, zero, chb, chc, &c);
    SET_VECTOR_ELT(dn, 0,	/* establish dimnames */
		   duplicate(VECTOR_ELT(GET_SLOT(a, Matrix_DimNamesSym), 0)));
    SET_VECTOR_ELT(dn, 1,
		   duplicate(VECTOR_ELT(GET_SLOT(b_M, Matrix_DimNamesSym), 1)));
    UNPROTECT(nprot);
    return chm_dense_to_SEXP(chc, 1, 0, dn);
}
 Dense :: Dense( Common& _common, int _m, int _n, int _xtype )
 : common( _common ),
   m( _m ),
   n( _n ),
   xtype( _xtype ),
   data( NULL ),
   rData( NULL ),
   iData( NULL )
 {
    int d = m; // leading dimension
    data = cholmod_l_allocate_dense( m, n, d, xtype, common );
    rData = (double*) data->x;
    if( xtype == CHOLMOD_ZOMPLEX )
    {
       iData = (double*) data->z;
    }
 }
 DenseMatrix :: DenseMatrix( int m_, int n_, int xtype_ )
 // initialize an mxn matrix of doubles
 // xtype is either DDG::Real or DDG::Complex
 : m( m_ ),
   n( n_ ),
   xtype( xtype_ ),
   data( NULL ),
   rData( NULL ),
   iData( NULL )
 {
    int d = m; // leading dimension
    data = cholmod_l_allocate_dense( m, n, d, xtype, context );
    rData = (double*) data->x;
    if( xtype == Complex )
    {
       iData = (double*) data->z;
    }
 }
Exemple #4
0
   cholmod_dense* DenseMatrix<Real> :: to_cholmod( void )
   // returns pointer to underlying cholmod_dense data structure
   {
      if( cData )
      {
         cholmod_l_free_dense( &cData, context );
         cData = NULL;
      }

      int d = m; // leading dimension
      cData = cholmod_l_allocate_dense( m, n, d, CHOLMOD_REAL, context );
      double* x = (double*) cData->x;

      for( int i = 0; i < m*n; i++ )
      {
         x[i] = data[i];
      }

      return cData;
   }
Exemple #5
0
SEXP Csparse_dense_crossprod(SEXP a, SEXP b)
{
    CHM_SP cha = AS_CHM_SP(a);
    SEXP b_M = PROTECT(mMatrix_as_dgeMatrix(b));
    CHM_DN chb = AS_CHM_DN(b_M);
    CHM_DN chc = cholmod_l_allocate_dense(cha->ncol, chb->ncol, cha->ncol,
					chb->xtype, &c);
    SEXP dn = PROTECT(allocVector(VECSXP, 2)); int nprot = 2;
    double one[] = {1,0}, zero[] = {0,0};
    R_CheckStack();
    // -- see Csparse_dense_prod() above :
    if(cha->xtype == CHOLMOD_PATTERN) {
	SEXP da = PROTECT(nz2Csparse(a, x_double)); nprot++;
	cha = AS_CHM_SP(da);
    }
    cholmod_l_sdmult(cha, 1, one, zero, chb, chc, &c);
    SET_VECTOR_ELT(dn, 0,	/* establish dimnames */
		   duplicate(VECTOR_ELT(GET_SLOT(a, Matrix_DimNamesSym), 1)));
    SET_VECTOR_ELT(dn, 1,
		   duplicate(VECTOR_ELT(GET_SLOT(b_M, Matrix_DimNamesSym), 1)));
    UNPROTECT(nprot);
    return chm_dense_to_SEXP(chc, 1, 0, dn);
}
   void Dense :: vertcat( const Dense& A, const Dense& B )
   {
      assert( A.n == B.n );
      assert( A.xtype == B.xtype );

      m = A.m + B.m;
      n = A.n;
      xtype = A.xtype;

      if( data ) { cholmod_l_free_dense( &data, common ); data = NULL; }
      data = cholmod_l_allocate_dense( m, n, m, xtype, common );
      rData = (double*) data->x;
      if( xtype == CHOLMOD_ZOMPLEX )
      {
         iData = (double*) data->z;
      }

      for( int i = 0; i < A.m; i++ )
      for( int j = 0; j < A.n; j++ )
      {
         rData[i+m*j] = A.rData[i+A.m*j];
         if( xtype == CHOLMOD_ZOMPLEX )
         {
            iData[i+m*j] = A.iData[i+A.m*j];
         }
      }

      for( int i = 0; i < B.m; i++ )
      for( int j = 0; j < B.n; j++ )
      {
         rData[(i+A.m)+m*j] = B.rData[i+B.m*j];
         if( xtype == CHOLMOD_ZOMPLEX )
         {
            iData[(i+A.m)+m*j] = B.iData[i+B.m*j];
         }
      }
   }
Exemple #7
0
   cholmod_dense* DenseMatrix<Quaternion> :: to_cholmod( void )
   // returns pointer to underlying cholmod_dense data structure
   {
      assert( nColumns() == 1 );

      if( cData )
      {
         cholmod_l_free_dense( &cData, context );
         cData = NULL;
      }

      cData = cholmod_l_allocate_dense( m*4, 1, m*4, CHOLMOD_REAL, context );
      double* x = (double*) cData->x;

      for( int i = 0; i < m*n; i++ )
      {
         for( int k = 0; k < 4; k++ )
         {
            x[i*4+k] = data[i][k];
         }
      }

      return cData;
   }
Exemple #8
0
int main() {
  /* Time Recording Variables */
  int now_s, now_u;
  struct rusage usage;
  double time_now, time_past;
  
  /* x, y, z and global indices */
  int i, j, k, m, n, nodes;
  /* Connectivity enties */
  int m_above, m_below, m_left, m_right, m_front, m_back;
  double c_above, c_below, c_left, c_right, c_front, c_back, c_self;
  
  /* Creat cholmod object */
  cholmod_common Common, *cc;
  /* Compress column form sparse matrix */
  cholmod_sparse *S, *ST;
  /* forcing function on input; voltages after solving */
  cholmod_dense *b, *v0, *r;  
  size_t *Si, *Sp, *Snz, *bi, *bj;
  double *Sx, *bx, *v0x;
 
  /* store the sparse matrix */
  FILE *store;
  char filename[100];
  
  /* start CHOLMOD */
  cc = &Common;
  cholmod_l_start(cc);
  
  /* initialize timing variables*/ 
  getrusage(0, &usage);
  now_s = usage.ru_utime.tv_sec;
  now_u = usage.ru_utime.tv_usec;
  time_past = now_s + now_u  / 1.e6;
  time_now = time_past;
  
  /* total nodes in the grid */
  nodes = IMAX * JMAX * KMAX;
  
  /* Allocate space for connectivity matrix and forcing vector. */
  S = cholmod_l_allocate_sparse(nodes, nodes, 7 * nodes, 
				0, 0, 0, CHOLMOD_REAL, cc);

  b = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc);
  bx = b->x;

  v0 = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc);
  v0x = v0->x;



  /*================================================================*/
  /*===============  make connectivity matrix ======================*/
  /*================================================================*/
  Si = (size_t *) S->i;
  Sp = (size_t *) S->p;
  Sx = (double *) S->x;
  Snz = (size_t *) S->nz;
  n = 0;
  Sp[0] = 0;
  for (k = 0; k < KMAX; k++) {
    for (j = 0; j < JMAX; j++) {
      for (i = 0; i < IMAX; i++) {
	/* Global index in x-fastest order*/
	m = k * IMAX * JMAX + j * IMAX + i;
	Sp[m + 1] = Sp[m];
	v0x[m] = (k + 1.0) / (KMAX + 1.0);

	/* Six coef.s of every node */
	c_below = (mepr(i - 0.5, j - 0.5, k - 0.5) + 
		   mepr(i + 0.5, j - 0.5, k - 0.5) + 
		   mepr(i - 0.5, j + 0.5, k - 0.5) +
		   mepr(i + 0.5, j + 0.5, k - 0.5)) / 4.0;

	c_front = (mepr(i - 0.5, j - 0.5, k - 0.5) + 
		   mepr(i + 0.5, j - 0.5, k - 0.5) + 
		   mepr(i - 0.5, j - 0.5, k + 0.5) +
		   mepr(i + 0.5, j - 0.5, k + 0.5)) / 4.0;

	c_left = (mepr(i - 0.5, j + 0.5, k - 0.5) + 
		  mepr(i - 0.5, j - 0.5, k - 0.5) + 
		  mepr(i - 0.5, j + 0.5, k + 0.5) +
		  mepr(i - 0.5, j - 0.5, k + 0.5)) / 4.0;

	c_right = (mepr(i + 0.5, j + 0.5, k - 0.5) + 
		   mepr(i + 0.5, j - 0.5, k - 0.5) + 
		   mepr(i + 0.5, j + 0.5, k + 0.5) +
		   mepr(i + 0.5, j - 0.5, k + 0.5)) / 4.0;

	c_back = (mepr(i - 0.5, j + 0.5, k - 0.5) + 
		  mepr(i + 0.5, j + 0.5, k - 0.5) + 
		  mepr(i - 0.5, j + 0.5, k + 0.5) +
		  mepr(i + 0.5, j + 0.5, k + 0.5)) / 4.0;

	c_above = (mepr(i - 0.5, j - 0.5, k + 0.5) + 
		   mepr(i + 0.5, j - 0.5, k + 0.5) + 
		   mepr(i - 0.5, j + 0.5, k + 0.5) +
		   mepr(i + 0.5, j + 0.5, k + 0.5))/4.0;

	/* Self term. */
	c_self = -(c_above + c_below + c_left + c_right + c_front + c_back);
	Si[n] = m; Sx[n] = c_self; n++; Sp[m + 1]++;

	/* Node below. Ensure not on bottom face */
	if (k != 0) {
	  m_below = m - IMAX * JMAX;
 	  Si[n] = m_below; Sx[n] = c_below; n++; Sp[m + 1]++;
	} else {
	  bx[m] = -c_below * VBOT;
	}

	/* Node front.  Ensure not on front face. */
	if (j != 0) {
	  m_front = m - IMAX;
	} else {
	  m_front = m + (JMAX - 1) * IMAX;
	}
	Si[n] = m_front; Sx[n] = c_front; n++; Sp[m + 1]++;

	/* Node to left.  Ensure not on left face. */
	if (i != 0) {
	  m_left = m - 1;
	} else {
	  m_left = m + IMAX -1;
	}
	Si[n] = m_left; Sx[n] = c_left; n++; Sp[m + 1]++;	   

	/* Node to right.  Ensure not on right face. */
	if (i != IMAX - 1) {
	  m_right = m + 1;
	} else{
	  m_right = m - IMAX + 1;
	}
	Si[n] = m_right; Sx[n] = c_right; n++; Sp[m + 1]++;

	/* Node back.  Ensure not on back face. */
	if (j != JMAX - 1) {
	  m_back = m + IMAX;
	} else {
	  m_back = m - (JMAX - 1) * IMAX;
	}
	Si[n] = m_back; Sx[n] = c_back; n++; Sp[m + 1]++;

	/* Node top. Ensure not on top face */
	if (k != KMAX - 1) {
	  m_above = m + IMAX * JMAX;
 	  Si[n] = m_above; Sx[n] = c_above; n++; Sp[m + 1]++;
	} else {
	  bx[m] = -c_above * VTOP;
	}

	Snz[m] = Sp[m + 1] - Sp[m];
      }
    }
  }
  /*====================================================================*/
  /*===================Done creating connectivity matrix.===============*/
  /*====================================================================*/

  /* report time*/
  getrusage(0, &usage);
  now_s = usage.ru_utime.tv_sec;
  now_u = usage.ru_utime.tv_usec;
  time_now = now_s + now_u / 1.e6;
  printf("\nFinished creating connectivity matrix\n"
	 "  Incremental time %f\n"
	 "  Running time %f\n", time_now - time_past, time_now);
  time_past = time_now;

  /* Print all three matrixes */
  cholmod_l_print_sparse(S, "S", cc);
  cholmod_l_print_dense(b, "b", cc);
  cholmod_l_print_dense(v0, "v0", cc);

  /* Allocate residual vector */
  r = cholmod_l_zeros(nodes, 1, CHOLMOD_REAL, cc);

  /* The Preconditioned Conjugate Gradient Method */
  /* Calculate the first residual value */
  double one[2], zero[2], minusone[2];
  zero[0] = 0.0;
  zero[1] = 0.0;
  one[0] = 1.0;
  one[1] = 0.0;
  minusone[0] = -1.0;
  minusone[1] = 0.0;

  cholmod_l_copy_dense2(b, r, cc);
  cholmod_l_sdmult(S, 0, minusone, one, v0, r, cc);
  printf("Initial 2-norm = %f\n", cholmod_l_norm_dense(r, 2, cc));
  printf("Initial 1-norm = %f\n", cholmod_l_norm_dense(r, 1, cc));
  printf("Initial 0-norm = %f\n", cholmod_l_norm_dense(r, 0, cc));
  

  /* The iteration */
  double rho1, rho0, beta = 0.0, alpha = 0.0;
  cholmod_dense *p1, *p0, *q;
  double *p1x, *p0x, *qx, *rx;
  /*
  p1 = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc);
  p0 = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc);
  q = cholmod_l_allocate_dense(nodes, 1, nodes, CHOLMOD_REAL, cc);  
  */
  p1 = cholmod_l_zeros(nodes, 1, CHOLMOD_REAL, cc);
  p0 = cholmod_l_zeros(nodes, 1, CHOLMOD_REAL, cc);
  q = cholmod_l_zeros(nodes, 1, CHOLMOD_REAL, cc);
  p1x = (double *) p1->x;
  p0x = (double *) p0->x;
  qx = (double *) q->x;
  rx = (double *) r->x;

  int iter;
  for (iter = 0; iter < MAX_ITER; iter++) {
    if (cholmod_l_norm_dense(r, 0, cc) < 1e-10)
      break;
    
    rho1 = cholmod_l_norm_dense(r, 2, cc);
    rho1 = rho1 * rho1;
    
    if (iter == 0) {
      cholmod_l_copy_dense2(r, p1, cc);
    } else {
      beta = rho1 / rho0;
      for (i = 0; i < nodes; i++) {
	p1x[i] = rx[i] + beta * p0x[i];
      }
    }
   
    cholmod_l_sdmult(S, 0, one, zero, p1, q, cc);
    alpha = 0;
    for (i = 0; i < nodes; i++) {
      alpha += p1x[i] * qx[i];
    }
    alpha = rho1 / alpha;

    for (i = 0; i < nodes; i++) {
      v0x[i] += alpha * p1x[i];
      rx[i] -= alpha * qx[i];
    }
    /*
    printf("iter = %d\n: rho1 = %f, rho0 = %f, alpha = %f, beta = %f\n", 
	   iter, rho1, rho0, alpha, beta);
    */
    cholmod_l_copy_dense2(p1, p0, cc);
    rho0 = rho1;
  }
  
  cholmod_l_copy_dense2(b, r, cc);
  cholmod_l_sdmult(S, 0, minusone, one, v0, r, cc);
  printf("After %d iterations:\n", iter);
  printf("Final 2-norm: %f\n", cholmod_l_norm_dense(r, 2, cc));
  printf("Final 1-norm: %f\n", cholmod_l_norm_dense(r, 1, cc));
  printf("Final 0-norm: %f\n", cholmod_l_norm_dense(r, 0, cc));
  /* sort sparse matrix and store it 
  cholmod_l_sort(S, cc);
  S->stype = 0;
  */

  /* Check if S is symmetric
  ST = cholmod_l_transpose(S, 2, cc);
  store = fopen("LpT.dat", "w");
  cholmod_l_write_sparse(store, ST, NULL, NULL, cc);
  fclose(store);
  cholmod_l_free_sparse(&ST, cc);
  */
  
  /* store the sparse matrix   */
  /*
  store = fopen("Laplace.dat", "w");
  cholmod_l_write_sparse(store, S, NULL, NULL, cc);
  fclose(store);
  cholmod_l_free_sparse(&S, cc);
  */

  /* store the force vector  */
  /*
  store = fopen("Force.dat", "w");
  cholmod_l_write_dense(store, b, NULL, cc);
  fclose(store);
  cholmod_l_free_dense(&b,cc);
  */

  /* store the guess vector   */
  store = fopen("Voltage.dat", "w");
  cholmod_l_write_dense(store, v0, NULL, cc);
  fclose(store);

  cholmod_l_free_dense(&v0, cc);

  /* store the residual vector  */
  store = fopen("Residual.dat", "w");
  cholmod_l_write_dense(store, r, NULL, cc);
  fclose(store);

  cholmod_l_free_dense(&r, cc);

  /* report time*/
  getrusage(0, &usage);
  now_s = usage.ru_utime.tv_sec;
  now_u = usage.ru_utime.tv_usec;
  time_now = now_s + now_u / 1.e6;
  printf("\nFinished writing matrix\n"
	 "  Incremental time %f\n"
	 "  Running time %f\n", time_now - time_past, time_now);

  cholmod_l_finish(cc);
  return 0;
}
void mexFunction
(
    int nargout,
    mxArray *pargout [ ],
    int nargin,
    const mxArray *pargin [ ]
)
{
    double dummy = 0, one [2] = {1,0}, zero [2] = {0,0} ;
    cholmod_sparse *S, Smatrix ;
    cholmod_dense *F, Fmatrix, *C ;
    cholmod_common Common, *cm ;
    Long srow, scol, frow, fcol, crow, transpose ; 

    /* ---------------------------------------------------------------------- */
    /* start CHOLMOD and set parameters */ 
    /* ---------------------------------------------------------------------- */

    cm = &Common ;
    cholmod_l_start (cm) ;
    sputil_config (SPUMONI, cm) ;

    /* ---------------------------------------------------------------------- */
    /* check inputs */
    /* ---------------------------------------------------------------------- */

    if (nargout > 1 || nargin < 2 || nargin > 3)
    {
	mexErrMsgTxt ("Usage: C = sdmult (S,F,transpose)") ; 
    }

    srow = mxGetM (pargin [0]) ;
    scol = mxGetN (pargin [0]) ;
    frow = mxGetM (pargin [1]) ;
    fcol = mxGetN (pargin [1]) ;

    transpose = !((nargin == 2) || (mxGetScalar (pargin [2]) == 0)) ;

    if (frow != (transpose ? srow : scol))
    {
	mexErrMsgTxt ("invalid inner dimensions") ;
    }

    if (!mxIsSparse (pargin [0]) || mxIsSparse (pargin [1]))
    {
	mexErrMsgTxt ("sdmult (S,F): S must be sparse, F must be full") ;
    }

    /* ---------------------------------------------------------------------- */
    /* get S and F */
    /* ---------------------------------------------------------------------- */

    S = sputil_get_sparse (pargin [0], &Smatrix, &dummy, 0) ;
    F = sputil_get_dense  (pargin [1], &Fmatrix, &dummy) ;

    /* ---------------------------------------------------------------------- */
    /* C = S*F or S'*F */
    /* ---------------------------------------------------------------------- */

    crow = transpose ? scol : srow ;
    C = cholmod_l_allocate_dense (crow, fcol, crow, F->xtype, cm) ;
    cholmod_l_sdmult (S, transpose, one, zero, F, C, cm) ;
    pargout [0] = sputil_put_dense (&C, cm) ;

    /* ---------------------------------------------------------------------- */
    /* free workspace and the CHOLMOD L, except for what is copied to MATLAB */
    /* ---------------------------------------------------------------------- */

    cholmod_l_finish (cm) ;
    cholmod_l_print_common (" ", cm) ;
    /*
    if (cm->malloc_count != (mxIsComplex (pargout [0]) + 1)) mexErrMsgTxt ("!");
    */
}
Exemple #10
0
 static cholmod_dense* allocate_dense(size_t nrow, size_t ncol, size_t d,
     int xtype, cholmod_common* c) {
   return cholmod_l_allocate_dense(nrow, ncol, d, xtype, c);
 }
int main(int argc, char* argv[])
{
	const int bufsize = 512;
    	char buffer[bufsize];
	int m,n,S;
	double time_st,time_end,time_avg;
	//omp_set_num_threads(2);
//	printf("\n-----------------\nnumber of threads fired = %d\n-----------------\n",(int)omp_get_num_threads());
	if(argc!=2)
	{
		cout<<"Insufficient arguments"<<endl;
		return 1;
	}
	
	graph G;

	cerr<<"Start reading                    ";
//	time_st=dsecnd();
	G.create_graph(argv[1]);
//	time_end=dsecnd();
//	time_avg = (time_end-time_st);
//	cout<<"Success              "<<endl;
//	cerr<<"Reading time                     "<<time_avg<<endl;

	cerr<<"Constructing Matrices            ";
//	time_st=dsecnd();
	G.construct_MNA();
	G.construct_NA();
//	time_end=dsecnd();
//	time_avg = (time_end-time_st);
//	cerr<<"Done                 "<<time_avg<<endl;

//	G.construct_sparse_MNA();
	m=G.node_array.size()-1;
	n=G.voltage_edge_id.size();
	
	cout<<endl;
	cout<<"MATRIX STAT:"<<endl;
	cout<<"Nonzero elements:               "<<G.nonzero<<endl;
	cout<<"Number of Rows:                 "<<m+n<<endl;
	cout<<"Nonzero in G:			"<<G.Gnonzero<<endl;
	cout<<"Number of rows in G:		"<<m<<endl;
	cout<<"Nonzero in P: 			"<<G.Pnonzero<<endl;
	cout<<"Number of rows in P:		"<<m<<endl;


//	printf("\n Nonzero = %d", G.nonzero);
//	printf("\n Rows = %d", m+n);

	cout<<"MAT val:		       "<<endl;
	int i,j;

	G.Mat_val[0] += 100;
	G.Gmat[0] +=100;
/*
	for(i=0;i<G.Gnonzero;i++)
		cout<<" "<<G.Gmat[i];
	cout<<endl;
	for(i=0;i<G.Gnonzero;i++)
		cout<<" "<<G.Gcolumns[i];
	cout<<endl;	
	for(i=0;i<m+1;i++)
		cout<<" "<<G.GrowIndex[i];
	cout<<endl;
	for(i=0;i<m;i++)
		printf(" %.8f", G.b1[i]);
	cout<<endl;
	for(i=0;i<m;i++)
		printf(" %.8f", G.x1[i]);
	cout<<endl;	
	
	
*/	SuiteSparse_long *Gnz = (SuiteSparse_long*)calloc(m,sizeof(SuiteSparse_long));
	for(i=0;i<m;i++)
	{
	//	cout<<endl;
		SuiteSparse_long startindex=G.GrowIndex[i];
		SuiteSparse_long endindex=G.GrowIndex[i+1];
		Gnz[i] = endindex - startindex;

//		for(j=startindex;j<endindex;j++)
//			cout<<" "<<G.Gmat[j];
//		cout<<endl;
		
	}


/*	for(i=0;i<G.Pnonzero;i++)
		cout<<" "<<G.Pmat[i];
	cout<<endl;
	for(i=0;i<G.Pnonzero;i++)
		cout<<" "<<G.Pcolumns[i];
	cout<<endl;	
	for(i=0;i<m+1;i++)
		cout<<" "<<G.ProwIndex[i];
	cout<<endl;
/*	for(i=0;i<m;i++)
		printf(" %.8f", G.b1[i]);
	cout<<endl;
	for(i=0;i<m;i++)
		printf(" %.8f", G.x1[i]);
	cout<<endl;	
	
	
	for(i=0;i<m;i++)
	{
		cout<<endl;
		int startindex=G.ProwIndex[i];
		int endindex=G.ProwIndex[i+1];
		for(j=startindex;j<endindex;j++)
			cout<<" "<<G.Pmat[j];
		cout<<endl;
		
	}
	
/*	for(i=0;i<G.nonzero;i++)
		cout<<" "<<G.Mat_val[i];
	cout<<endl;
	for(i=0;i<G.nonzero;i++)
		cout<<" "<<G.columns[i];
	cout<<endl;	
	for(i=0;i<m+n+1;i++)
		cout<<" "<<G.rowIndex[i];
	cout<<endl;
	for(i=0;i<m+n;i++)
		printf(" %.8f", G.b[i]);
	cout<<endl;
	for(i=0;i<m+n;i++)
		printf(" %.8f", G.x[i]);
	cout<<endl;	
	
	
	for(i=0;i<m+n;i++)
	{
		cout<<endl;
		int startindex=G.rowIndex[i];
		int endindex=G.rowIndex[i+1];
		for(j=startindex;j<endindex;j++)
			cout<<" "<<G.Mat_val[j];
		cout<<endl;
		
	}
*/
/*	for (i=0;i<m+n+1;i++)
	{
		//cout<<endl;
		if(G.rowIndex[i]==G.rowIndex[i+1])
			break;
		
		for(j=G.rowIndex[i];j<G.rowIndex[i+1];j++)
		{
			if(G.Mat_val[j]>10)
				cout<<G.Mat_val[j]<<"\t";
		}
		//cout<<endl;
		/*for(j=G.rowIndex[i];j<G.rowIndex[i+1];j++)
		{
			cout<<G.columns[j]<<"\t";
		}
		//cout<<endl;
	}
	cout<<endl;
*/

//printing the matrix
	printf("\n Fine till here");
	printf("\n");
//	int* rowmIndex=(int*)calloc(m+1,sizeof(int));
	printf("\n Fine till here");
	printf("\n");
	//int rowmIndex[5]={1,2,3,4,5};
/*	for(i=0;i<m+1;i++)
	{
		rowmIndex[i]=G.rowIndex[i];
		printf(" %d", rowmIndex[i]);
	}
*/
	printf("\n Allocating GPU memory\n");
	cudaDeviceReset();
	size_t free, total;
	cudaMemGetInfo(&free, &total);
	printf("\n Free Mem = %lf MB, Total mem = %lf MB\n", (double)(free)/(1024*1024), (double)(total)/(1024*1024));


	double *dev_csrValA, *dev_b, *dev_x;
	int *dev_csrRowIdxA, *dev_csrColA;

	double *dev_GcsrVal, *dev_b1, *dev_x1;
	double *dev_PcsrVal, *dev_b2, *dev_x2;
	
	int *dev_GcsrRowIdx, *dev_PcsrRowIdx, *dev_GcsrCol, *dev_PcsrCol;
	

	
	cudaMalloc((void**)&dev_PcsrVal, G.Pnonzero*sizeof(double));
	cudaMalloc((void**)&dev_PcsrRowIdx, (m+1)*sizeof(int));
	cudaMalloc((void**)&dev_PcsrCol, G.Pnonzero*sizeof(int));


	cudaMalloc((void**)&dev_b1, (m)*sizeof(double));
	cudaMalloc((void**)&dev_b2, n*sizeof(double));
	cudaMalloc((void**)&dev_x1, m*sizeof(double));
	cudaMalloc((void**)&dev_x2, n*sizeof(double));

	cudaMemcpy(dev_b1, G.b1, (m)*sizeof(double), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_x1, G.x1, (m)*sizeof(double), cudaMemcpyHostToDevice);

	cudaMemcpy(dev_PcsrVal, G.Pmat, G.Pnonzero*sizeof(double), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_b2, G.b2, (n)*sizeof(double), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_x2, G.x2, (n)*sizeof(double), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_PcsrRowIdx, G.ProwIndex, (m+1)*sizeof(int), cudaMemcpyHostToDevice);
	cudaMemcpy(dev_PcsrCol, G.Pcolumns, (G.Pnonzero)*sizeof(int), cudaMemcpyHostToDevice);


	/* Matrix has been created and stored in CSR format.
	However, CHOLMOD requires CSC format. Since our matrix is symmetric positive definite, we can simply swap 
	csrColA with csrRowIdx and vice versa
	*/

	/* Starting the CHOLMOD routine now*/
	printf("\n Initiating CHOLMOD\n");
	cholmod_sparse *A, *P;
	cholmod_dense *x, *b, *r, *midvec;
	cholmod_factor *L;
	cholmod_common *Common, cm;
	Common = &cm;
	cholmod_l_start(Common);
//	&Common->useGPU=1;
	printf("\n m = %d, G.Gnonzero = %d\n", m, G.Gnonzero);	


	
	cholmod_sparse *C = cholmod_l_allocate_sparse((size_t)(m), (size_t)(m), (size_t)(G.Gnonzero), 1, 0, 1, 1, Common); 
//	P = cholmod_l_allocate_sparse((size_t)(m), (size_t)(n), (size_t)(G.Pnonzero), 1, 0, 0, 1, Common); 
//	printf("\n Allocated \n");

	C->itype = CHOLMOD_LONG;	
//	printf("\n Itype \n");
	C->p = &G.GrowIndex[0];
//	printf("\n Columns \n");
	C->nz = &Gnz[0];
//	printf("\n Rows \n");
	C->i = &G.Gcolumns[0];
	C->dtype = 0;
	C->x = &G.Gmat[0];
	 
/*	P->itype = CHOLMOD_LONG;
	P->p = &G.ProwIndex[0];
	P->nz = &Pnz[0];
	P->i = &G.Pcolumns[0];
	P->dtype = 0;
	P->x = &G.Pmat[0]; 
*/	 

	b = cholmod_l_allocate_dense((size_t)(m), 1, (size_t)(m), 1, Common);
	b->dtype=0;
	b->x = &G.b1[0];
	b->xtype = 1;

	printf("\n CHOLMOD manually set\n");
	cholmod_l_print_sparse(C, "A", Common);
	cholmod_l_print_dense(b, "b", Common);


	cudaEvent_t start, stop;
	cudaEventCreate(&start);
	cudaEventCreate(&stop);

	cudaEventRecord(start, 0);

	L = cholmod_l_analyze(C, Common);
	printf("\n Analysis: Flops: %g \t lnz: %g\n", Common->fl, Common->lnz);
	cholmod_l_factorize(C, L, Common);
	x = cholmod_l_solve(CHOLMOD_A, L, b, Common);
	
	cudaEventRecord(stop, 0);
	cudaEventSynchronize(stop);
	
	

	float elapsedTime;
	cudaEventElapsedTime(&elapsedTime, start, stop);
	printf("\n Time : %.6f secs :\n", elapsedTime);
	
	cholmod_l_print_dense(x, "X", Common);
	
	double *x1_mod = (double*)x->x;
	
	cudaMemcpy(dev_x1, x1_mod, m*sizeof(double), cudaMemcpyHostToDevice);
	
	cusparseStatus_t cuSparseStatus;
	cusparseHandle_t cuspHandle;
	cuSparseStatus = cusparseCreate(&cuspHandle);

	cusparseMatDescr_t descrP;
	cusparseCreateMatDescr(&descrP);

	cusparseSetMatType(descrP, CUSPARSE_MATRIX_TYPE_GENERAL);	
	cusparseSetMatIndexBase(descrP, CUSPARSE_INDEX_BASE_ZERO);
	
	
	double *dev_res1, *dev_simple;
	double *res1 = (double*)calloc(n,sizeof(double));
	cudaMalloc((void**)&dev_res1, n*sizeof(double));
	cudaMalloc((void**)&dev_simple, n*sizeof(double));
	
	const double alpha = 1.0, beta=0.0;
	//alpha = 1.0;
	//beta = 0.0;
	
	//solving P^T * G^-1 * b1 Result stored in dev_res1
	
	cuSparseStatus = cusparseDcsrmv(cuspHandle, CUSPARSE_OPERATION_TRANSPOSE, m, n, G.Pnonzero, &alpha, descrP, dev_PcsrVal, dev_PcsrRowIdx, dev_PcsrCol, dev_x1, &beta, dev_res1);
		
	if(cuSparseStatus == CUSPARSE_STATUS_SUCCESS)
	{
/*		cudaMemcpy(res1, dev_res1, n*sizeof(double), cudaMemcpyDeviceToHost);
		for(i=0;i<n;i++)
		{
			printf("\nres1[%d] = %.8f", i, res1[i]);
		}
		printf("\n P^T * G^-1 * b1 done! Vector stored in res1");
*/	}
	else
	{
		printf("\n P^T * G^-1 * b1 failed\n");
		exit(1);
	}
	
	const double alphaneg = -1.0;
		
		//Solving P^T * G^-1 * b1 - b2 ; Result stored in dev_res1
	
		
	cublasStatus_t cuBlasStatus;
	cublasHandle_t cubHandle;
	cuBlasStatus = cublasCreate(&cubHandle);
		
	cuBlasStatus = cublasDaxpy(cubHandle, n, &alphaneg, dev_b2, 1, dev_res1, 1);
	if(cuBlasStatus == CUBLAS_STATUS_SUCCESS)
	{
//		cudaMemcpy(res1, dev_res1, n*sizeof(double), cudaMemcpyDeviceToHost);
//		for(i=0;i<n;i++)
//		{
//			printf("\nres1[%d] = %.8f", i, res1[i]);
//		}
		printf("\n res1 = res1 - b2 done\n");
		 
	}
	else
	{
		printf("\n res1 = res1 - b2 failed\n");
	}
		
	
	
	
	///NOW COMPUTING G^-1 * P
	
	
	int k = 0;
	int breakloop=0;
	
	double **midMat = (double**)malloc(m*sizeof(double*));
	for(i=0;i<m;i++)
	{
		midMat[i] = (double*)calloc(n,sizeof(double));
	}
	
	cudaEventRecord(start, 0);
	
	for(i=0;i<n;i++)
	{
		breakloop = 0;
		double *vect = (double*)calloc(m,sizeof(double*));

		for(j=0;j<m;j++)
		{
			int startin = G.ProwIndex[j];
			int endin = G.ProwIndex[j+1];
			if(startin == endin)
				continue;
				
			k = startin;

			while(k<endin)
			{

				if(G.Pcolumns[k] == i)
				{	
					vect[j] = G.Pmat[k];
					breakloop=1;
					break;
				
				}
				k++;
			}
			if(breakloop == 1)
			{
				break;
			}
		}
		
		midvec = cholmod_l_allocate_dense((size_t)(m), 1, (size_t)(m), 1, Common);
		midvec->dtype=0;
		midvec->x=&vect[0];
		midvec->xtype = 1;
		
		cholmod_dense *res2;
		
		res2 = cholmod_l_solve(CHOLMOD_A, L, midvec, Common);

		
		double *re = (double*)res2->x;
		
//		printf("\n vector %d is:\n", i);
		int i1, j1, k1;
//		for(j1=0;j1<m;j1++)
//		{
//			midmat2flat[i+j1*n] = re[j1];
//			printf(" %lf", re[j1]);
//		}
//		printf("\n");
		for(i1=0;i1<m;i1++)
		{
			midMat[i1][i] = re[i1];
		}
		
		cholmod_l_free_dense(&midvec, Common);
			
	}
	
/*	printf("\n Midmat = \n");
	for(i=0;i<m;i++)
	{
		for(j=0;j<n;j++)
		{
			printf(" %lf", midMat[i][j]);
		}
		printf("\n");
	} 
*/
	double *midMatflat = (double*)calloc((m*n),sizeof(double));
	double *dev_midMat;
	double *dev_solut;
	int counter = 0;
	for(i=0;i<n;i++)
	{
		for(j=0;j<m;j++)
		{
			midMatflat[counter] = midMat[j][i];
			counter++; 
		}
	}
	
	cudaMalloc((void**)&dev_midMat, m*n*sizeof(double));
	cudaMalloc((void**)&dev_solut, n*n*sizeof(double));
	
	cudaMemcpy(dev_midMat, midMatflat, m*n*sizeof(double), cudaMemcpyHostToDevice);
		
	//Solving P^T * midMat; Result stored in dev_solut
		
	cuSparseStatus = cusparseDcsrmm(cuspHandle, CUSPARSE_OPERATION_TRANSPOSE, m, n, n, G.Pnonzero, &alpha, descrP, dev_PcsrVal, dev_PcsrRowIdx, dev_PcsrCol, dev_midMat, m, &beta, dev_solut, n);
	
	if(cuSparseStatus == CUSPARSE_STATUS_SUCCESS)
	{
		printf("\n Solved P^T * G^-1 * P. Result stored in solut\n");
		
	}  
	else
	{
		printf("\n  Failed to Solve P^T * G^-1 * P \n");
		exit(1);
	}

/*	double *matGflat = (double*)calloc(n*n,sizeof(double));
	cudaMemcpy(matGflat, dev_solut, n*n*sizeof(double), cudaMemcpyDeviceToHost);
	counter = 0;
	printf("\nBefore LU starts\n");
	for(i=0;i<n;i++)
	{
		for(j=0;j<n;j++)
		{
			printf(" %lf ", matGflat[counter]);
			counter++;
		}
		printf("\n");
	}
	printf("\n");
*/	
	cusolverStatus_t cuSolverStatus;
	
	
	cusolverDnHandle_t cudenHandle;
	cuSolverStatus = cusolverDnCreate(&cudenHandle);
	int Lwork = 0;
	cuSolverStatus = cusolverDnDgetrf_bufferSize(cudenHandle, n, n, dev_solut, n, &Lwork);
	
	if(cuSolverStatus == CUSOLVER_STATUS_SUCCESS)
	{
		printf("\n Buffer works\n Lwork = %d\n", Lwork);
	}
	else
	{
		exit(1);	
	}
	double *dev_Workspace;
	int *dev_Ipiv, *dev_Info;
	
	cudaMalloc((void**)&dev_Workspace, Lwork*sizeof(double));
	cudaMalloc((void**)&dev_Ipiv, n*sizeof(int));
	cudaMalloc((void**)&dev_Info, sizeof(int));
	
	//Calculating LU for dev_solut
//	double *nnmat = (double*)calloc(n*n,sizeof(double));
//	cudaMemcpy(nnmat, dev_solut, n*n*sizeof(double), cudaMemcpyDeviceToHost);
//	cuSolverStatus = cusolverDnDgetrfHost(cudenHandle, n, n, 

	cuSolverStatus = cusolverDnDgetrf(cudenHandle, n, n, dev_solut, n, dev_Workspace, dev_Ipiv, dev_Info);
	
	if(cuSolverStatus == CUSOLVER_STATUS_SUCCESS)
	{
		printf("\n solut has be defactorized into L and U. dev_Ipiv * solut = L * U\n");
	}
	else
	{
	
		printf("\n Unable to defactorize solut into LU\n");
		exit(1);	
	}
	
	//solving dev_solut * x = dev_res1. Result stored in dev_res1
	
	cuSolverStatus = cusolverDnDgetrs(cudenHandle, CUBLAS_OP_N, n, 1, dev_solut, n, dev_Ipiv, dev_res1, n, dev_Info);
	if(cuSolverStatus == CUSOLVER_STATUS_SUCCESS)
	{
		printf("\n Solution obtained for x2 \n");
	}
	else
	{
		printf("\n LU decomposition obtained by LU solver failed\n");
	}

/*	cudaMemcpy(G.x2, dev_res1, n*sizeof(double), cudaMemcpyDeviceToHost);
	printf("\n x2 = \n");
	for(i=0;i<n;i++)
	{
		printf("\n x2[%d] = %lf", i, G.x2[i]); 
	}
*/	
	
	double *dev_dummy;
	cudaMalloc((void**)&dev_dummy, m*sizeof(double));
	cudaMemset(dev_dummy, 0.0, m*sizeof(double));
	
	printf("\n Starting solving for x1 \n");
	//Solving for x1
	
		//Solving G^-1 * P * x2;  G^-1 * P is stored in midMat
	cuBlasStatus = cublasDgemv(cubHandle, CUBLAS_OP_N, m, n, &alpha, dev_midMat, m, dev_res1, 1, &beta, dev_dummy, 1);
	if(cuBlasStatus == CUBLAS_STATUS_SUCCESS)
	{
/*		double *toprint = (double*)calloc(m,sizeof(double));
		cudaMemcpy(toprint, dev_dummy, m*sizeof(double), cudaMemcpyDeviceToHost);
		printf("\n Intermediate vector :\n");
		for(i=0;i<m;i++)
		{
			printf("\ndummy[%d] = %lf", i, toprint[i]);
		}
*/		printf("\n midmat * x2 obtained. Stored in dummy\n");
	}
	else
	{
		printf("\n Failed to obtain midmat * x2\n");
	}
	
	cuBlasStatus = cublasDaxpy(cubHandle, m, &alphaneg, dev_dummy, 1, dev_x1, 1);
	if(cuBlasStatus == CUBLAS_STATUS_SUCCESS)
	{
/*		cudaMemcpy(G.x1, dev_x1, m*sizeof(double), cudaMemcpyDeviceToHost);
		printf("\n x1 = \n");
		for(i=0;i<m;i++)
		{
			printf("\n x1[%d] = %.15f", i, G.x1[i]);
		}
*/		printf("\n x1 obtained");
	}
	else
	{
		printf("\n Failed to obtain x1");
	}

	printf("\n Solver finished its work\n");

/*		cudaEventRecord(stop, 0);
		cudaEventSynchronize(stop);

		cudaEventElapsedTime(&elapsedTime, start, stop);
		printf("\n Time: %.6f msecs :\n", elapsedTime);
*/








	
	
	
	
	

	cholmod_l_finish(Common);
	return 0;

}