Esempi in C++ (Cpp) per dgemm

Esempio n. 1

0

Mostra file

File: eig3d_primme.c Progetto: Jackyhaorockman/TT-Toolbox

void PrimmeMatvec(void *x, void *y, int *blockSize, primme_params *primme) {
// bfun3
// sizes of res1, res2: max(rx1*m*ra2*ry2, rx1*ra1*n*ry2)
   double *xvec, *yvec;
   char op;
   long sz1, sz2, sz3;
   long i;

   xvec = (double *)x;
   yvec = (double *)y;

   op = 'N';

   //phi2(rx2,ra2,ry2)
   //phi1(ry1,rx1,ra1)

   for (i=0; i<*blockSize; i++) {
    sz1 = g_rx1*g_n;
    sz2 = g_ra2*g_rx2;
    sz3 = g_rx2;
    dgemm(&op, &op, &sz1, &sz2, &sz3, &done, &xvec[i*sz1*sz3], &sz1, g_Phi2, &sz3, &dzero, g_res2, &sz1);
    dtransp(g_rx1, g_n*g_ra2*g_rx2, g_res2, g_res1);
    sz1 = g_ra1*g_n;
    sz2 = g_rx2*g_rx1;
    sz3 = g_n*g_ra2;
    dgemm(&op, &op, &sz1, &sz2, &sz3, &done, g_A, &sz1, g_res1, &sz3, &dzero, g_res2, &sz1);
    dtransp(g_ra1*g_n*g_rx2, g_rx1, g_res2, g_res1);
    sz1 = g_rx1;
    sz2 = g_n*g_rx2;
    sz3 = g_rx1*g_ra1;
    dgemm(&op, &op, &sz1, &sz2, &sz3, &done, g_Phi1, &sz1, g_res1, &sz3, &dzero, &yvec[i*sz1*sz2], &sz1);
   }
}

Esempio n. 2

0

Mostra file

File: matmulbench.c Progetto: wavemoth/wavemoth

double doit(int m, int k, int n) {
  clock_t t0, dt;
  int i, j, repeats;
  double *A, *x, *y;
  A = (double*)malloc(m * k * sizeof(double));
  x = (double*)malloc(k * n * sizeof(double));
  y = (double*)malloc(m * n * sizeof(double));
  for (i = 0; i != m * n; ++i) {
    A[i] = i;
  }
  for (i = 0; i != k * n; ++i) {
    x[i] = i;
  }
  for (i = 0; i != m * n; ++i) {
    y[i] = i;
  }
  dgemm('N', 'N', m, n, k, 1.0, A, m, x, k, 1.0, y, m);
  t0 = clock(); dt = 0;
  repeats = 0;
  while (dt < CLOCKS_PER_SEC) {
    dgemm('N', 'N', m, n, k, 1.0, A, m, x, k, 1.0, y, m);
    dt = clock() - t0;
    ++repeats;
  }
  free(A);
  free(x);
  free(y);
  return ((double)dt / CLOCKS_PER_SEC) / repeats;
}

Esempio n. 3

0

Mostra file

File: mexHelperBLAS.c Progetto: hosna/m-files

int mfiles_dgemm00(
                   double alpha,
                   const mxArray *A,
                   const mxArray *B,
                   double beta,
                   mxArray *C) {
    ptrdiff_t rA = mxGetM(A);
    ptrdiff_t cA = mxGetN(A);
    ptrdiff_t rB = mxGetM(B);
    ptrdiff_t cB = mxGetN(B);
    ptrdiff_t rC = mxGetM(C);
    ptrdiff_t cC = mxGetN(C);

    if (mxIsSparse(A) || mxIsSparse(B) || mxIsSparse(C)) {
        mexErrMsgIdAndTxt("mfiles:BadType",
                          "Sparse matrices are not supported.");
    }

    if (mxIsComplex(A) || mxIsComplex(B) || mxIsComplex(C)) {
        mexErrMsgIdAndTxt("mfiles:BadType",
                          "Complex matrices are not supported.");
    }

    if ((cA != rB) || (rA != rC) || (cB != cC)) {
        mexErrMsgIdAndTxt("mfiles:BadDim",
                          "Dimensions of matrices do not match.");
    }

    double *pA = mxGetPr(A);
    double *pB = mxGetPr(B);
    double *pC = mxGetPr(C);
    dgemm("N", "N", &rC, &cC, &cA, &alpha, pA, &rC, pB, &cA, &beta, pC, &rC);
    return EXIT_SUCCESS;
}

Esempio n. 4

0

Mostra file

File: cpu.cpp Progetto: xflying777/OpenAcc

// x(k+1) = M^(-1) * (b - D * x(k))
void fixpoint_iteration(double *Beta, double *D, double *x, double *b, int N, double tol)
{
	int i, j;

	double *xk, *temp, error;

	xk = (double *) malloc(N*N*sizeof(double));
	temp = (double *) malloc(N*N*sizeof(double));

	for (i=0; i<N*N; i++)
	{
		for (j=0; j<N*N; j++)	xk[j] = x[j];

		dgemm(temp, D, xk, N);
//		if (i == 0)	printf(" Dgemm finish. \n");

		for (j=0; j<N*N; j++)	temp[j] = b[j] - Beta[j]*temp[j];

		fastpoisson(temp, x, N);
//		if(i == 0)	printf(" Fast Poisson finish. \n");

		for (j=0; j<N*N; j++)	temp[j] = x[j] - xk[j];
		error = cblas_dnrm2(N*N, temp, 1);

//		printf(" Step %d finish. \n", i+1);
		if ( error < tol)
		{
			printf("\n Converges at %d step ! \n", i+1);
			break;
		}
	}
}

Esempio n. 5

0

Mostra file

File: mpi-mkldgemm.c Progetto: coolr-hpc/coolr-ubench

static void dosomething(int rank, int seq_rank)
{
	int i;
	double t1, t2;

	if (seq_rank == -1 ||
	    (seq_rank >= 0 && seq_rank == rank)) {
		for (i = 0; i < m_m*m_k ; ++i)
			prd.A[i] = 1.0;
		for (i = 0; i < m_k*m_n ; ++i)
			prd.B[i] = 2.0;
		for (i = 0; i < m_m*m_n ; ++i)
			prd.C[i] = 1.0;

		t1 = MPI_Wtime();
		for (i = 0; i < N_INNER; i++) {
			dgemm(&transa, &transb, &m_m, &m_n, &m_k, &alpha,
			      prd.A, &m_m, prd.B, &m_k, &beta,
			      prd.C, &m_m);
		}
		t2 = MPI_Wtime();
		prd.elapsedtime = t2-t1;
		prd.gflops = (flopsval * 1e-9)/prd.elapsedtime;
	} else {
		prd.gflops = 0.0;
		//sleep(1);
	}
}

Esempio n. 6

0

Mostra file

File: matrix.cpp Progetto: ezahedin/DE_Self-Consistent

//=============================================================================//
matrix conjugate(const matrix& t1) {
    
    //first making the identity matrix
    int rows=t1.rows;
    int cols=t1.cols;
    matrix eye(cols,cols);
    for (int i=0 ; i<rows*rows; i++) {
        eye.array1d[i]=0;
    }
    
    for (int j=0; j<rows; j++) {
        eye.array1d[cols*j+j]=1;
    }
    

    int m=t1.rows,k=t1.cols,n=eye.cols;
    int lda=k,ldc=n,ldb=n;
    matrix product(m,n);

    
    double alpha=1,beta=0; //C<--alpha*A*B_beta*C
    dgemm("CSA","NSA",&m,&n,&k,&alpha,t1.array1d,&lda,eye.array1d,&ldb,&beta,product.array1d,&ldc);
    
    return product;
}

Esempio n. 7

0

Mostra file

File: mxm.c Progetto: akva2/tma4280

void mxm_blas (Matrix a, Matrix b, Matrix c)
{
  char    trans = 'N';
  double alpha = 1.0, beta = 0.0;
  dgemm(&trans, &trans, &a->rows, &b->cols, &a->cols, &alpha,
        a->data[0], &a->cols, b->data[0], &a->rows, &beta, c->data[0], &a->rows);
}

Esempio n. 8

0

Mostra file

File: dgemm_entry.c Progetto: nschloe/sippp

int bi_entry(void *mcb, int problemsize,double *results){
	double one=1.0;
	double time=0, start, stop;
	double nOperations, size;
	char N='N';
	double *f1= ((fds*)mcb)->feld1, *f2=((fds*)mcb)->feld2, *f3=((fds*)mcb)->feld3;
	
	if(results == NULL)
		return -1;
	
	size = bi_dgemm_start + (problemsize-1) * bi_dgemm_increment;
	results[0] = size;
	nOperations = (1.0*size)*(1.0*size)*(2.0*size-1.0);
	
	/* init matrices -> cache-friendly */
	init_data(mcb, size);
	
	/* ************************** */
	start=bi_timer();
	dgemm(N, N, size, size, size, 1.0, f1, size, f2, size, one, f3, size);
	stop=bi_timer();
	/* ************************** */
	
	time=stop-start - dTimerOverhead;
	if (time < 3*dTimerGranularity)   {
		results[1]=INVALID_MEASUREMENT;
	}
	else
		results[1]=nOperations/time;

	return 0;
}

Esempio n. 9

0

Mostra file

File: RcppGLM.cpp Progetto: frankongre/RcppGLM

NumericMatrix DMM(SEXP xs, SEXP ys) {
    double * X = as<double*>(xs);
    double * Y = as<double*>(ys);
    // Dimension xs & ys
    NumericMatrix xx(xs);
    NumericMatrix yy(ys);
    int n = xx.nrow(), m = yy.ncol();

    if(xx.ncol() != yy.nrow()) {
        stop("invalid dimenstion of matrices");
    }
    int p = xx.ncol();
    // Result matrix Z
    double * Z = new double[m * n];

    // Initialization
    memset(Z, 0, sizeof(double) * m * n);


    // Matrix production
    dgemm(X, Y, Z, n, p, m);
    NumericMatrix zz = wrap(Z, n, m);

    return zz;
}

Esempio n. 10

0

Mostra file

File: test.c Progetto: diegonieto/matrix-multiply-lib

void test3() {
    int i, j;
    const int m = 3;
    const int n = 2;
    const int p = 3;
    double *left = (double*)malloc(m*p*sizeof(double));
    double *right = (double*)malloc(p*n*sizeof(double));
    double *dest1 = (double*)malloc(m*n*sizeof(double));
    double *dest2 = (double*)malloc(m*n*sizeof(double));

    for(i=0; i<m; i++)
        for(j=0; j<p; j++)
            left[i*p+j] = i*p+j;

    for(i=0; i<p; i++)
        for(j=0; j<n; j++)
            right[i*n+j] = i*n+j;

    dgemm(m,n,p,left,right, dest1);

    naive_dgemm(m,n,p,left,right, dest2);

    for(i=0; i<m; i++)
        for(j=0; j<n; j++)
            assert(dest1[i*n+j] == dest1[i*n+j]);

    free(left);
    free(right);
    free(dest1);
    free(dest2);
}

Esempio n. 11

0

Mostra file

File: computeCsfmInfimumMex.c Progetto: Aleem21/sfm_toolbox

/* Compute A*B', A is of size m *p and B n * p  */
__inline double matrixMultiply(double *A, double *B, double *C, mwSignedIndex m, mwSignedIndex p, mwSignedIndex n) {
  double one = 1.0, zero = 0.0;
  char chn[] = "N";
  char chnb[] = "C";
  
  dgemm(chn, chnb, &m, &n, &p, &one, A, &m, B, &n, &zero, C, &m);
}

Esempio n. 12

0

Mostra file

File: common.c Progetto: georgekw/tma4280

void MxM2(Matrix C, const Matrix A, const Matrix B, int b_ofs, int b_col, 
          int c_ofs, double alpha, double beta)
{
  char trans='N';
  dgemm(&trans, &trans, &A->rows, &b_col, &A->cols, &alpha,
        A->data[0], &A->rows, B->data[b_ofs], &A->cols, &beta, C->data[c_ofs],
        &C->rows);
}

Esempio n. 13

0

Mostra file

File: dynamics.cpp Progetto: CURG/graspit_bci

/*!
  Creates a 6x6 matrix, \a transformMat, that transforms a wrench expressed
  in one coordinate system to wrench expressed in another.  \a T is the
  transform, and \a p is the new torque origin expressed within the new
  coordinate system.
*/
void buildForceTransform(transf &T,vec3 &p,double *transformMat)
{
  static int j,k;
  static double R[9];
  static double crossMat[9];
  static double Rcross[9];
  static vec3 radius;

  R[0] = T.affine().element(0,0); 
  R[1] = T.affine().element(0,1); 
  R[2] = T.affine().element(0,2); 
  
  R[3] = T.affine().element(1,0); 
  R[4] = T.affine().element(1,1); 
  R[5] = T.affine().element(1,2); 
  
  R[6] = T.affine().element(2,0); 
  R[7] = T.affine().element(2,1); 
  R[8] = T.affine().element(2,2); 
/*
  R[0] = T.affine().element(0,0); 
  R[1] = T.affine().element(1,0); 
  R[2] = T.affine().element(2,0); 
  
  R[3] = T.affine().element(0,1); 
  R[4] = T.affine().element(1,1); 
  R[5] = T.affine().element(2,1); 
  
  R[6] = T.affine().element(0,2); 
  R[7] = T.affine().element(1,2); 
  R[8] = T.affine().element(2,2); 
*/ 

  for (j=0;j<9;j++)
    if (fabs(R[j]) < MACHINE_ZERO) R[j] = 0.0;
    else if (R[j] > 1.0 - MACHINE_ZERO) R[j] = 1.0;
    else if (R[j] < -1.0 + MACHINE_ZERO) R[j] = -1.0;
      
  radius = T.translation() - p;
	
  crossMat[0]=0.0;       crossMat[3]=-radius.z();crossMat[6]=radius.y();
  crossMat[1]=radius.z();crossMat[4]=0.0;        crossMat[7]=-radius.x();
  crossMat[2]=-radius.y();crossMat[5]= radius.x();crossMat[8]=0.0;

  //original graspit
  //dgemm("N","N",3,3,3,1.0,R,3,crossMat,3,0.0,Rcross,3);

  // mtc: new version, I believe this is the correct one
  dgemm("N","N",3,3,3,1.0,crossMat,3,R,3,0.0,Rcross,3);
	
  fillMatrixBlock(R,3,0,0,2,2,transformMat,6);
  for (j=3;j<6;j++)
    for (k=0;k<3;k++) 
      transformMat[6*j+k] = 0.0;
  fillMatrixBlock(Rcross,3,3,0,5,2,transformMat,6);
  fillMatrixBlock(R,3,3,3,5,5,transformMat,6);
}

Esempio n. 14

0

Mostra file

File: matrix_fun.c Progetto: BigBossB/Cortexsys

// =============================
// multiply:   C = op(A) * op(B)
// =============================
void mulMatMat(double* C, double* A, double* B,
				   const int rA, const int cA, const int rB, const int cB, const char *mod) {
#ifndef USE_BLAS // naive C implementations

   if ( (mod[0] == 'N') && (mod[1] == 'N') )
      multAB(C, A, B,rA, cA, cB);
   else if ( (mod[0] == 'T') && (mod[1] == 'N') )
      multAtB(C, A, B, rA, cA, cB);
   else if ( (mod[0] == 'N') && (mod[1] == 'T') )
      multABt(C, A, B, rA, cA, rB);
   else if ( (mod[0] == 'T') && (mod[1] == 'T') )
      multAtBt(C, A, B, rA, cA, rB);

#else

   // rows(Op(A)), columns(Op(A)), columns(Op(B)), rows(C)
   ptrdiff_t ropA, copA, copB, rC;  
   // can't pass consts to fortran
   ptrdiff_t rA0 = rA, rB0 = rB;    

   char modA = mod[0], modB = mod[1];
   double one = 1.0, zero = 0.0;

   if (mod[0] != 'S'){
      if ( (mod[0] == 'N') && (mod[1] == 'N') ){
         ropA  = rA;
         copA  = cA;   
         copB  = cB;
         rC    = rA;
      } else if ( (mod[0] == 'T') && (mod[1] == 'N') ){
         ropA  = cA;
         copA  = rA;   
         copB  = cB;
         rC    = cA;   
      } else if ( (mod[0] == 'N') && (mod[1] == 'T') ){
         ropA  = rA;
         copA  = cA;   
         copB  = rB;
         rC    = rA;   
      } else if ( (mod[0] == 'T') && (mod[1] == 'T') ){
         ropA  = cA;
         copA  = rA;   
         copB  = rB;
         rC    = cA;   
      }
      dgemm(&modA, &modB, &ropA, &copB, &copA, &one, A, &rA0, B, &rB0, &zero, C, &rC);
   } else {  
      char side='L', uplo = 'U';
      ropA  = rA;
      copB  = cB;     
      dsymm(&side, &uplo, &ropA, &copB,        &one, A, &rA0, B, &rB0, &zero, C, &rC);
      // why the f**k does this not work ???
   }
#endif
}

Esempio n. 15

0

Mostra file

File: bigalgebra.cpp Progetto: cran/bigalgebra

/* Wrappers for miscellaneous BLAS and LAPACK routines. */
SEXP
dgemm_wrapper (SEXP TRANSA, SEXP TRANSB, SEXP M, SEXP N, SEXP K,
               SEXP ALPHA, SEXP A, SEXP LDA, SEXP B, SEXP LDB, SEXP BETA,
               SEXP C, SEXP LDC, SEXP A_isBM, SEXP B_isBM, SEXP C_isBM,
               SEXP C_offset)
{
  long j = *(DOUBLE_DATA (C_offset));
  double *pA = make_double_ptr (A, A_isBM);
  double *pB = make_double_ptr (B, B_isBM);
  double *pC;
  SEXP ans;
  INT MM = (INT) * (DOUBLE_DATA (M));
  INT NN = (INT) * (DOUBLE_DATA (N));
  INT KK = (INT) * (DOUBLE_DATA (K));
  INT LDAA = (INT) * (DOUBLE_DATA (LDA));
  INT LDBB = (INT) * (DOUBLE_DATA (LDB));
  INT LDCC = (INT) * (DOUBLE_DATA (LDC));
  if(LOGICAL_VALUE(C_isBM) == (Rboolean) TRUE)
  {
/* Return results in a big matrix */
    pC = make_double_ptr (C, C_isBM) + j;
    PROTECT(ans = C);
  } else {
/* Allocate an output R matrix and return results there
   XXX Add check for size of MM and NN XXX 
 */
    PROTECT(ans = allocMatrix(REALSXP, (int)MM, (int)NN));
    pC = NUMERIC_DATA(ans);
  }
/* An example of an alternate C-blas interface (e.g., ACML) */
#ifdef CBLAS
  dgemm (*((char *) CHARACTER_VALUE (TRANSA)),
         *((char *) CHARACTER_VALUE (TRANSB)),
         MM, NN, KK, *(NUMERIC_DATA (ALPHA)), pA, LDAA, pB,
         LDBB, *(NUMERIC_DATA (BETA)), pC, LDCC);
#elif REFBLAS
/* Standard Fortran interface without underscoring */
  int8_dgemm ((char *) CHARACTER_VALUE (TRANSA),
         (char *) CHARACTER_VALUE (TRANSB),
         &MM, &NN, &KK, NUMERIC_DATA (ALPHA), pA, &LDAA, pB,
         &LDBB, NUMERIC_DATA (BETA), pC, &LDCC);
#else
/* Standard Fortran interface from R's blas */
  dgemm_ ((char *) CHARACTER_VALUE (TRANSA),
         (char *) CHARACTER_VALUE (TRANSB),
         &MM, &NN, &KK, NUMERIC_DATA (ALPHA), pA, &LDAA, pB,
         &LDBB, NUMERIC_DATA (BETA), pC, &LDCC);
#endif
  unprotect(1);
  return ans;
}

Esempio n. 16

0

Mostra file

File: tutorial_region.c Progetto: geopm/geopm

int tutorial_dgemm(double big_o, int do_report)
{
    int err = 0;
    if (big_o != 0.0) {
        int matrix_size = (int) pow(4e9 * big_o, 1.0/3.0);
        int pad_size = 64;
        size_t mem_size = sizeof(double) * (matrix_size * (matrix_size + pad_size));
        char transa = 'n';
        char transb = 'n';
        int M = matrix_size;
        int N = matrix_size;
        int K = matrix_size;
        int LDA = matrix_size + pad_size / sizeof(double);
        int LDB = matrix_size + pad_size / sizeof(double);
        int LDC = matrix_size + pad_size / sizeof(double);
        double alpha = 2.0;
        double beta = 3.0;
        double *A = NULL;
        double *B = NULL;
        double *C = NULL;

        err = posix_memalign((void *)&A, pad_size, mem_size);
        if (!err) {
            err = posix_memalign((void *)&B, pad_size, mem_size);
        }
        if (!err) {
            err = posix_memalign((void *)&C, pad_size, mem_size);
        }

        if (!err) {
            #pragma omp parallel for
            for (int i = 0; i < mem_size / sizeof(double); ++i) {
                A[i] = random() / RAND_MAX;
                B[i] = random() / RAND_MAX;
            }

            if (do_report) {
                printf("Executing a %d x %d DGEMM\n", matrix_size, matrix_size);
                fflush(stdout);
            }

            dgemm(&transa, &transb, &M, &N, &K, &alpha,
                  A, &LDA, B, &LDB, &beta, C, &LDC);
            free(C);
            free(B);
            free(A);
        }
    }
    return err;
}

Esempio n. 17

0

Mostra file

File: EIS_reg_vec.c Progetto: aborowska/MyMit

void EIS_reg(double *y, double *theta, double *w, mwSignedIndex S,
           double *beta)
{
    mwSignedIndex i;
    char *chN = "N", *chT = "T";
    double one = 1.0, zero = 0.0;
    double A[9], B[3], *X, *Y;
    mwSignedIndex K, N;
    mwSignedIndex IPIV[3], INFO;   
    
    /* Variable size arrays */
    X = malloc((3*S)*sizeof(double));              
    Y = malloc((S)*sizeof(double)); 
    
    K = 3;
    N = 1;
    
    /* create X and Y */
    for (i=0; i<S; i++)
    {      
        X[i] = w[i];
        X[S+i] = w[i]*theta[i];
        X[2*S+i] = -0.5*theta[i]*X[S+i];
        Y[i] = w[i]*y[i];
    } 
    
    dgemm(chT, chN, &K, &K, &S, &one, X, &S, X, &S, &zero, &A, &K);      /* get X'*X*/    
    dgemm(chT, chN, &K, &N, &S, &one, X, &S, Y, &S, &zero, &B, &K);      /* get X'*Y*/
    dgetrf(&K, &K, &A, &K, &IPIV, &INFO);                                /* get LU factorisation of A*/
    dgetrs(chN, &K, &N, &A, &K, &IPIV, &B, &K, &INFO);                   /* get solution B*/

    beta[0] = B[1];
    beta[1] = B[2];
    
     /* Free allocated memory */
    free(X); free(Y); 
}

Esempio n. 18

0

Mostra file

File: ooc_offload.async.cpp Progetto: ryancoleman/lotsofcoresbook1code

void offload_dgemm(const char *transa, const char *transb, const MKL_INT *m, const MKL_INT *n, const MKL_INT *k,
                   const double *alpha, const double *a, const MKL_INT *lda, const double *b, const MKL_INT *ldb,
                   const double *beta, double *c, const MKL_INT *ldc){
/*
 * perform dgemm on the device. a,b,c pre-exist on the device
 */
    intptr_t aptr = (intptr_t)a;
    intptr_t bptr = (intptr_t)b;
    intptr_t cptr = (intptr_t)c;
    #pragma offload target(mic:MYDEVICE) in(transa,transb,m,n,k:length(1)) \
                                         in(alpha,lda,ldb,beta,ldc:length(1)) 
    {
        dgemm(transa,transb,m,n,k,alpha,(double*)aptr,lda,(double*)bptr,ldb,beta,(double*)cptr,ldc); 
    }
}

Esempio n. 19

0

Mostra file

File: matrix.cpp Progetto: ezahedin/DE_Self-Consistent

//=============================================================================//
matrix operator*(const matrix& t1,const matrix& t2)
{
    int m=t1.rows,k=t1.cols,n=t2.cols;
    int lda=m,ldc=m,ldb=k;

    double alpha=1.00,beta=0; //C<--alpha*A*B_beta*C
    
    matrix product(m,n);
    /*
    Since we have row-major array and blas does the product in column major we first tranpose each matrix and then
    pass them to the dgemm function. TSD and TSA with having the fist letter "T" sending this message to the dgemm. The other
    letters "S" and "A" do not matter */
    dgemm("NSA","NSA",&m,&n,&k,&alpha,t1.array1d,&lda,t2.array1d,&ldb,&beta,product.array1d,&ldc);
    return product;
}

Esempio n. 20

0

Mostra file

File: dgemm.c Progetto: ursache/HPC-hacks

double
time_dgemm (const int M, const int N, const unsigned K,
                const double alpha, const double *A, const int lda,
                const double *B, const int ldb,
                const double beta, double *C, const unsigned ldc)
{
	double mflops, mflop_s;
	double secs = -1;

	int num_iterations = NRUNS;
	int i;

	double* Ca = (double*) _mm_malloc(N*ldc*sizeof(double), 32);

	double cpu_time = 0;

	double last_clock = mysecond();
	for (i = 0; i < num_iterations; ++i) 
	{
		memcpy(Ca, C, N*ldc*sizeof(double));
		cpu_time -= mysecond();
#ifdef PAPI
		PAPI_START;
#endif
		dgemm (M, N, K, alpha, A, lda, B, ldb, beta, Ca, ldc);
#ifdef PAPI
		PAPI_STOP;
		PAPI_PRINT;
#endif
		cpu_time += mysecond();
	}

	mflops  = 2.0 * num_iterations*M*N*K/1.0e6;
	secs    = cpu_time;
	mflop_s = mflops/secs;


	memcpy(C, Ca, N*ldc*sizeof(double));
#ifdef PAPI
	PAPI_FLUSH;
#endif
	_mm_free(Ca);	
	return mflop_s;
}

Esempio n. 21

0

Mostra file

File: blashelper.hpp Progetto: BenJamesbabala/twostreamfusion

 static vl::Error
 gemm(vl::Context& context,
      char op1, char op2,
      ptrdiff_t m, ptrdiff_t n, ptrdiff_t k,
      type alpha,
      type const * a, ptrdiff_t lda,
      type const * b, ptrdiff_t ldb,
      type beta,
      type * c, ptrdiff_t ldc)
 {
   dgemm(&op1, &op2,
         &m, &n, &k,
         &alpha,
         (type*)a, &lda,
         (type*)b, &ldb,
         &beta,
         c, &ldc) ;
   return vl::vlSuccess ;
 }

Esempio n. 22

0

Mostra file

File: CORE_library.c Progetto: FelixPascal/BCILAB

/* ------------------------------------------------------------------------
 * Function: pairdist_squared_rows
 * ------------------------------------------------------------------------ */
void pairdist_squared_rows(double *vectsX, double *normsqrX, double *vectsY, double *normsqrY,
                           double *dists, int M, int N, int D)
{
  int m,n;
  char chn = 'N', cht = 'T';
  double minus2 = -2.0, one = 1.0;

  /* Distances are computed as follows:
   *    dist(x,y)^2 = (x-y)'(x-y) = x'x + y'y - 2 x'y
   * The 2nd formulation requires fewer flops than the 1st. */
  for (n = 0; n < N; n++) {
    for (m = 0; m < M; m++) {
      dists[m + n*M] = normsqrX[m] + normsqrY[n];  // write x'x + y'y
    }
  }  
  dgemm(&chn, &cht, &M, &N, &D, &minus2, vectsX, &M, vectsY, &N, &one, dists, &M);  // then add -2x'y

  if ((vectsX == vectsY) && (M == N)) {  // Self-distance; force diagonal to 0
    for (n = 0; n < N; n++) {  dists[n + n*N] = 0;  }
  }
}

Esempio n. 23

0

Mostra file

File: taucs_blas_nounderscore_test.c Progetto: Wackye/taucs

int main(int argc, char* argv[]) 
{ 
  double A = 2.0; 
  double B = 3.0; 
  double C = 5.0; 
  double alpha = 7.0;
  double beta  = 11.0;
  int n = 1; 
  int ld = 1; 
  FILE * file;

  void dgemm (char*,char*,int*,
	      int*,int*,
	      double*,
	      double*,int*,double*,int*,
	      double*,
	      double*,int*);
  
  dgemm ("N","N", &n,&n,&n, &alpha, &A,&ld, &B,&ld, &beta, &C,&ld);

  printf("\n\n");
  printf("Linking with dgemm succedded\n");
  printf("\n\n");

  if (C == 97.0) {
    file = fopen( argv[1], "a" );
    if ( file == NULL ) {
      printf("Problem opening file.\n");
      return -1;
    }
    fprintf(file, "/* Definition for BLAS functions */\n");
    fprintf(file, "#define TAUCS_BLAS_NOUNDERSCORE\n");
    fclose( file );
    return 0;
  } else {
    return 1;
  }
}

Esempio n. 24

0

Mostra file

File: utils.c Progetto: CMPUT659T4/cmput659_project

void cblas_dgemm(enum CBLAS_ORDER Order,enum CBLAS_TRANSPOSE transA, enum CBLAS_TRANSPOSE transB,
                 mwSignedIndex M, mwSignedIndex N, mwSignedIndex K, double alpha, double *A, mwSignedIndex lda,
                 double *B, mwSignedIndex ldb, double beta, double *C, mwSignedIndex ldc)
{
	char ta[1],tb[1];
	if (transA==111)
	{
		*ta='N';
	}
	else
	{
		*ta='T';
	};
	if (transB==111)
	{
		*tb='N';
	}
	else
	{
		*tb='T';
	};
	dgemm(ta,tb,&M,&N,&K,&alpha,A,&lda,B,&ldb,&beta,C,&ldc);
}

Esempio n. 25

0

Mostra file

File: SylvMatrix.cpp Progetto: AnushikS/dynare

void SylvMatrix::multLeft(int zero_cols, const GeneralMatrix& a, const GeneralMatrix& b)
{
	int off = a.numRows() - a.numCols();
	if (off < 0 || a.numRows() != rows || off != zero_cols ||
		rows != b.numRows() || cols != b.numCols()) {
		throw SYLV_MES_EXCEPTION("Wrong matrix dimensions for multLeft.");
	}
	// here we cannot call SylvMatrix::gemm since it would require
	// another copy of (usually big) b (we are not able to do inplace
	// submatrix of const GeneralMatrix)
	if (a.getLD() > 0 && ld > 0) {
		blas_int mm = a.numRows();
		blas_int nn = cols;
		blas_int kk = a.numCols();
		double alpha = 1.0;
		blas_int lda = a.getLD();
		blas_int ldb = ld;
		double beta = 0.0;
		blas_int ldc = ld;
		dgemm("N", "N", &mm, &nn, &kk, &alpha, a.getData().base(), &lda,
				   b.getData().base()+off, &ldb, &beta, data.base(), &ldc);
	}
}

Esempio n. 26

0

Mostra file

File: TempoNestLinearLikeFuncs.cpp Progetto: zhuww/tempo2

void LRedMarginLinearLogLike(double *Cube, int &ndim, int &npars, double &lnew, void *context)
{

	int numfit=((MNStruct *)context)->numFitTiming + ((MNStruct *)context)->numFitJumps+1;
	double Fitparams[numfit];
	double *EFAC;
	double EQUAD;
	int pcount=0;
	
// 	printf("here1\n");
	for(int p=0;p<ndim;p++){
// 		printf("param %i %g %g\n",p,((MNStruct *)context)->Dpriors[p][0],((MNStruct *)context)->Dpriors[p][1]);
		Cube[p]=(((MNStruct *)context)->Dpriors[p][1]-((MNStruct *)context)->Dpriors[p][0])*Cube[p]+((MNStruct *)context)->Dpriors[p][0];
	}
// 	printf("here1.5\n");
	for(int p=0;p < numfit; p++){
		Fitparams[p]=Cube[p];
		pcount++;
// 		printf("param: %i %g \n",p,Fitparams[p]);
	}

	if(((MNStruct *)context)->numFitEFAC == 0){
		EFAC=new double[1];
		EFAC[0]=1;
// 		
	}
	else if(((MNStruct *)context)->numFitEFAC == 1){
		EFAC=new double[1];
		EFAC[0]=Cube[pcount];
		pcount++;
	}
	else if(((MNStruct *)context)->numFitEFAC > 1){
		EFAC=new double[((MNStruct *)context)->numFitEFAC];
		for(int p=0;p< ((MNStruct *)context)->numFitEFAC; p++){
			EFAC[p]=Cube[pcount];
			pcount++;
		}
	}				

	if(((MNStruct *)context)->numFitEQUAD == 0){
		EQUAD=0;
// 		printf("EQUAD: %g \n",EQUAD);
	}
	else{
		
		EQUAD=pow(10.0,2*Cube[pcount]);
		pcount++;
// 		printf("EQUAD: %g %g %g %i \n",EQUAD,EQUADPrior[0],EQUADPrior[1],((MNStruct *)context)->numFitTiming + ((MNStruct *)context)->numFitJumps + ((MNStruct *)context)->numFitEFAC);
	}

  	

	double *Fitvec=new double[((MNStruct *)context)->pulse->nobs];
	double *Diffvec=new double[((MNStruct *)context)->pulse->nobs];
	dgemv(((MNStruct *)context)->DMatrix,Fitparams,Fitvec,((MNStruct *)context)->pulse->nobs,numfit,'N');
	for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){
		Diffvec[o]=((MNStruct *)context)->pulse->obsn[o].residual-Fitvec[o];
	}


	int FitCoeff=2*(((MNStruct *)context)->numFitRedCoeff);
	double *powercoeff=new double[FitCoeff];


	double *Noise=new double[((MNStruct *)context)->pulse->nobs];
	double *GDiffvec=new double[((MNStruct *)context)->Gsize];

	for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){
		Noise[o]=pow(((((MNStruct *)context)->pulse->obsn[o].toaErr)*pow(10.0,-6))*EFAC[((MNStruct *)context)->sysFlags[o]],2) + EQUAD;
	}

	double **NG = new double*[((MNStruct *)context)->pulse->nobs]; for (int k=0; k<((MNStruct *)context)->pulse->nobs; k++) NG[k] = new double[((MNStruct *)context)->Gsize];
	for(int i=0;i<((MNStruct *)context)->pulse->nobs;i++){
		for(int j=0;j<((MNStruct *)context)->Gsize; j++){
			NG[i][j]=((MNStruct *)context)->GMatrix[i][j]*Noise[i];

		}
	}

	double** GG = new double*[((MNStruct *)context)->Gsize]; for (int k=0; k<((MNStruct *)context)->Gsize; k++) GG[k] = new double[((MNStruct *)context)->Gsize];

	dgemm(((MNStruct *)context)->GMatrix, NG,GG,((MNStruct *)context)->pulse->nobs, ((MNStruct *)context)->Gsize,((MNStruct *)context)->pulse->nobs, ((MNStruct *)context)->Gsize, 'T','N');

	
	double tdet=0;
	dpotrf(GG, ((MNStruct *)context)->Gsize, tdet);
	dpotri(GG,((MNStruct *)context)->Gsize);

	dgemm(((MNStruct *)context)->GMatrix, GG,NG,((MNStruct *)context)->pulse->nobs, ((MNStruct *)context)->Gsize, ((MNStruct *)context)->Gsize, ((MNStruct *)context)->Gsize, 'N','N');

	double **GNG = new double*[((MNStruct *)context)->pulse->nobs]; for (int k=0; k<((MNStruct *)context)->pulse->nobs; k++) GNG[k] = new double[((MNStruct *)context)->pulse->nobs];	

	dgemm(NG, ((MNStruct *)context)->GMatrix, GNG,((MNStruct *)context)->pulse->nobs, ((MNStruct *)context)->Gsize, ((MNStruct *)context)->pulse->nobs, ((MNStruct *)context)->Gsize, 'N','T');


	double timelike=0;
	for(int o1=0; o1<((MNStruct *)context)->pulse->nobs; o1++){
		for(int o2=0;o2<((MNStruct *)context)->pulse->nobs; o2++){
			timelike=timelike+Diffvec[o1]*GNG[o1][o2]*Diffvec[o2];
		}
	}

	double *NFd = new double[FitCoeff];
	double **FMatrix=new double*[((MNStruct *)context)->pulse->nobs];
	for(int i=0;i<((MNStruct *)context)->pulse->nobs;i++){
		FMatrix[i]=new double[FitCoeff];
	}

	double **NF=new double*[((MNStruct *)context)->pulse->nobs];
	for(int i=0;i<((MNStruct *)context)->pulse->nobs;i++){
		NF[i]=new double[FitCoeff];
	}

	double **FNF=new double*[FitCoeff];
	for(int i=0;i<FitCoeff;i++){
		FNF[i]=new double[FitCoeff];
	}

	double start,end;
	int go=0;
	for (int i=0;i<((MNStruct *)context)->pulse->nobs;i++)
	  {
	    if (((MNStruct *)context)->pulse->obsn[i].deleted==0)
	      {
		if (go==0)
		  {
		    go = 1;
		    start = (double)((MNStruct *)context)->pulse->obsn[i].bat;
		    end  = start;
		  }
		else
		  {
		    if (start > (double)((MNStruct *)context)->pulse->obsn[i].bat)
		      start = (double)((MNStruct *)context)->pulse->obsn[i].bat;
		    if (end < (double)((MNStruct *)context)->pulse->obsn[i].bat)
		      end = (double)((MNStruct *)context)->pulse->obsn[i].bat;
		  }
	      }
	  }
// 	printf("Total time span = %.6f days = %.6f years\n",end-start,(end-start)/365.25);
	double maxtspan=end-start;


	double freqdet=0;
	for (int i=0; i<FitCoeff/2; i++){
		int pnum=pcount;
		double pc=Cube[pcount];
		
		powercoeff[i]=pow(10.0,pc)/(maxtspan*24*60*60);///(365.25*24*60*60)/4;
		powercoeff[i+FitCoeff/2]=powercoeff[i];
		freqdet=freqdet+2*log(powercoeff[i]);
		pcount++;
	}


	int coeffsize=FitCoeff/2;
	std::vector<double>freqs(FitCoeff/2);
	for(int i=0;i<FitCoeff/2;i++){
		freqs[i]=double(i+1)/maxtspan;
	}	
	

	for(int i=0;i<FitCoeff/2;i++){
		for(int k=0;k<((MNStruct *)context)->pulse->nobs;k++){
			double time=(double)((MNStruct *)context)->pulse->obsn[k].bat; //- (double)((MNStruct *)context)->pulse->param[param_pepoch].val[0] - maxtspan/2;
			FMatrix[k][i]=cos(2*M_PI*freqs[i]*time);
// 			printf("cos %i %i %g \n",i,k,time);
		}
	}

	for(int i=0;i<FitCoeff/2;i++){
		for(int k=0;k<((MNStruct *)context)->pulse->nobs;k++){
			double time=(double)((MNStruct *)context)->pulse->obsn[k].bat; //- (double)((MNStruct *)context)->pulse->param[param_pepoch].val[0] - maxtspan/2;
			FMatrix[k][i+FitCoeff/2]=sin(2*M_PI*freqs[i]*time);
// 			printf("sin %i %i %g \n",i+FitCoeff/2,k,time);
		}
	}


	dgemm(GNG, FMatrix , NF, ((MNStruct *)context)->pulse->nobs,((MNStruct *)context)->pulse->nobs, ((MNStruct *)context)->pulse->nobs, FitCoeff, 'N', 'N');

	dgemm(FMatrix, NF , FNF, ((MNStruct *)context)->pulse->nobs, FitCoeff, ((MNStruct *)context)->pulse->nobs, FitCoeff, 'T', 'N');

	dgemv(NF,Diffvec,NFd,((MNStruct *)context)->pulse->nobs,FitCoeff,'T');

	double **PPFM=new double*[FitCoeff];
	for(int i=0;i<FitCoeff;i++){
		PPFM[i]=new double[FitCoeff];
		for(int j=0;j<FitCoeff;j++){
			PPFM[i][j]=0;
		}
	}


	for(int c1=0; c1<FitCoeff; c1++){
		PPFM[c1][c1]=1.0/powercoeff[c1];
	}



	for(int j=0;j<FitCoeff;j++){
		for(int k=0;k<FitCoeff;k++){
			
			PPFM[j][k]=PPFM[j][k]+FNF[j][k];
		}
	}

 	
	double jointdet=0;
	dpotrf(PPFM, FitCoeff, jointdet);
        dpotri(PPFM,FitCoeff);

	double freqlike=0;
	for(int i=0;i<FitCoeff;i++){
		for(int j=0;j<FitCoeff;j++){
// 			printf("%i %i %g %g\n",i,j,NFd[i],PPFM[i][j]);
			freqlike=freqlike+NFd[i]*PPFM[i][j]*NFd[j];
		}
	}
	
	lnew=-0.5*(tdet+jointdet+freqdet+timelike-freqlike);

	if(isnan(lnew) || isinf(lnew)){

		lnew=-pow(10.0,200);
// 		printf("red amp and alpha %g %g\n",redamp,redalpha);
// 		printf("Like: %g %g %g \n",lnew,Chisq,covdet);
		
	}


	delete[] EFAC;
	delete[] powercoeff;
	delete[] NFd;

	for (int j = 0; j < FitCoeff; j++){
		delete[]PPFM[j];
	}
	delete[]PPFM;

	for (int j = 0; j < ((MNStruct *)context)->pulse->nobs; j++){
		delete[]NF[j];
	}
	delete[]NF;

	for (int j = 0; j < FitCoeff; j++){
		delete[]FNF[j];
	}
	delete[]FNF;

	for (int j = 0; j < ((MNStruct *)context)->pulse->nobs; j++){
		delete[]FMatrix[j];
	}
	delete[]FMatrix;

	delete[] Noise;
	delete[] Diffvec;
	delete[] Fitvec;

	for (int j = 0; j < ((MNStruct *)context)->pulse->nobs; j++){
		delete[] NG[j];
	}
	delete[] NG;

	for (int j = 0; j < ((MNStruct *)context)->Gsize; j++){
		delete[]GG[j];
	}
	delete[] GG;

	for (int j = 0; j < ((MNStruct *)context)->pulse->nobs; j++){
		delete[] GNG[j];
	}
	delete[] GNG;

// 	if(isinf(lnew) || isinf(jointdet) || isinf(tdet) || isinf(freqdet) || isinf(timelike) || isinf(freqlike)){
// 	printf("Chisq: %g %g %g %g %g %g \n",lnew,jointdet,tdet,freqdet,timelike,freqlike);
// 	}

}

Esempio n. 27

0

Mostra file

File: TempoNestLinearLikeFuncs.cpp Progetto: zhuww/tempo2

void WhiteMarginLinearLogLike(double *Cube, int &ndim, int &npars, double &lnew, void *context)
{

	int numfit=((MNStruct *)context)->numFitTiming + ((MNStruct *)context)->numFitJumps+1;
	double Fitparams[numfit];
	double *EFAC;
	double EQUAD;
	int pcount=0;
	
// 	printf("here1\n");
	for(int p=0;p<ndim;p++){
// 		printf("param %i %g %g\n",p,((MNStruct *)context)->Dpriors[p][0],((MNStruct *)context)->Dpriors[p][1]);
		Cube[p]=(((MNStruct *)context)->Dpriors[p][1]-((MNStruct *)context)->Dpriors[p][0])*Cube[p]+((MNStruct *)context)->Dpriors[p][0];
	}
// 	printf("here1.5\n");
	for(int p=0;p < numfit; p++){
		Fitparams[p]=Cube[p];
		pcount++;
// 		printf("param: %i %g \n",p,Fitparams[p]);
	}

// 	printf("here3\n");
	if(((MNStruct *)context)->numFitEFAC == 0){
		EFAC=new double[1];
		EFAC[0]=1;
// 		
	}
	else if(((MNStruct *)context)->numFitEFAC == 1){
		EFAC=new double[1];
		EFAC[0]=Cube[pcount];
		pcount++;
	}
	else if(((MNStruct *)context)->numFitEFAC > 1){
		EFAC=new double[((MNStruct *)context)->numFitEFAC];
		for(int p=0;p< ((MNStruct *)context)->numFitEFAC; p++){
			EFAC[p]=Cube[pcount];
			pcount++;
		}
	}				
// 	printf("here4\n");
	if(((MNStruct *)context)->numFitEQUAD == 0){
		EQUAD=0;
// 		printf("E: %g %g\n",EFAC[0],EQUAD);
	}
	else{
		
		EQUAD=pow(10.0,2*Cube[pcount]);
		pcount++;

	}

	double *Fitvec=new double[((MNStruct *)context)->pulse->nobs];
	double *Diffvec=new double[((MNStruct *)context)->pulse->nobs];
	dgemv(((MNStruct *)context)->DMatrix,Fitparams,Fitvec,((MNStruct *)context)->pulse->nobs,numfit,'N');
	for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){
		Diffvec[o]=((MNStruct *)context)->pulse->obsn[o].residual-Fitvec[o];
	}


	double *Noise=new double[((MNStruct *)context)->pulse->nobs];
	double *GDiffvec=new double[((MNStruct *)context)->Gsize];

	for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){
		Noise[o]=pow(((((MNStruct *)context)->pulse->obsn[o].toaErr)*pow(10.0,-6))*EFAC[((MNStruct *)context)->sysFlags[o]],2) + EQUAD;
	}

	double **NG = new double*[((MNStruct *)context)->pulse->nobs]; for (int k=0; k<((MNStruct *)context)->pulse->nobs; k++) NG[k] = new double[((MNStruct *)context)->Gsize];
	for(int i=0;i<((MNStruct *)context)->pulse->nobs;i++){
		for(int j=0;j<((MNStruct *)context)->Gsize; j++){
			NG[i][j]=((MNStruct *)context)->GMatrix[i][j]*Noise[i];

		}
	}

	double** GG = new double*[((MNStruct *)context)->Gsize]; for (int k=0; k<((MNStruct *)context)->Gsize; k++) GG[k] = new double[((MNStruct *)context)->Gsize];

	dgemm(((MNStruct *)context)->GMatrix, NG,GG,((MNStruct *)context)->pulse->nobs, ((MNStruct *)context)->Gsize,((MNStruct *)context)->pulse->nobs, ((MNStruct *)context)->Gsize, 'T','N');


	dgemv(((MNStruct *)context)->GMatrix,Diffvec,GDiffvec,((MNStruct *)context)->pulse->nobs,((MNStruct *)context)->Gsize,'T');



	double detN=0;
	double *WorkGDiffvec = new double[((MNStruct *)context)->Gsize];
	for(int o1=0;o1<((MNStruct *)context)->Gsize; o1++){
		WorkGDiffvec[o1]=GDiffvec[o1];
	}
	dpotrf(GG, ((MNStruct *)context)->Gsize, detN);
        dpotrs(GG, WorkGDiffvec, ((MNStruct *)context)->Gsize);



	double Chisq=0;
	for(int o1=0;o1<((MNStruct *)context)->Gsize; o1++){
		Chisq += GDiffvec[o1]*WorkGDiffvec[o1];
	}


	if(isnan(detN) || isinf(detN) || isnan(Chisq) || isinf(Chisq)){

		lnew=-pow(10.0,200);
	
	}
	else{
		lnew = -0.5*(((MNStruct *)context)->pulse->nobs*log(2*M_PI) + detN + Chisq);	
	}

	//printf("lnew: %g %g %g \n", lnew, detN, Chisq);

	delete[] EFAC;
	delete[] Fitvec;
	delete[] Diffvec;
	delete[] GDiffvec;
	delete[] WorkGDiffvec;

	for (int j = 0; j < ((MNStruct *)context)->pulse->nobs; j++){
		delete[] NG[j];
	}
	delete[] NG;

	for (int j = 0; j < ((MNStruct *)context)->Gsize; j++){
		delete[]GG[j];
	}
	delete[] GG;


}

Esempio n. 28

0

Mostra file

File: TempoNestLinearLikeFuncs.cpp Progetto: zhuww/tempo2

void vHRedMarginLinearLogLike(double *Cube, int &ndim, int &npars, double &lnew, void *context)
{

	int numfit=((MNStruct *)context)->numFitTiming + ((MNStruct *)context)->numFitJumps+1;
	double Fitparams[numfit];
	double *EFAC;
	double EQUAD, redamp, redalpha;
	int pcount=0;

// 	printf("here1\n");
	for(int p=0;p<ndim;p++){
// 		printf("param %i %g %g\n",p,((MNStruct *)context)->Dpriors[p][0],((MNStruct *)context)->Dpriors[p][1]);
		Cube[p]=(((MNStruct *)context)->Dpriors[p][1]-((MNStruct *)context)->Dpriors[p][0])*Cube[p]+((MNStruct *)context)->Dpriors[p][0];
	}
// 	printf("here1.5\n");
	for(int p=0;p < numfit; p++){
		Fitparams[p]=Cube[p];
		pcount++;
// 		printf("param: %i %g \n",p,Fitparams[p]);
	}

	if(((MNStruct *)context)->numFitEFAC == 0){
		EFAC=new double[1];
		EFAC[0]=1;
// 		
	}
	else if(((MNStruct *)context)->numFitEFAC == 1){
		EFAC=new double[1];
		EFAC[0]=Cube[pcount];
		pcount++;
		
	}
	else if(((MNStruct *)context)->numFitEFAC > 1){
		EFAC=new double[((MNStruct *)context)->numFitEFAC];
		for(int p=0;p< ((MNStruct *)context)->numFitEFAC; p++){
			EFAC[p]=Cube[pcount];
			pcount++;
		}
	}				

	if(((MNStruct *)context)->numFitEQUAD == 0){
		EQUAD=0;
// 		printf("EQUAD: %g \n",EQUAD);
	}
	else{
		
		EQUAD=pow(10.0,2*Cube[pcount]);
		pcount++;
//		printf("E: %g %g \n",EQUAD,EFAC[0]);

	}


	redamp=Cube[pcount];
	pcount++;
	redalpha=Cube[pcount];
	pcount++;
  	

	double *Fitvec=new double[((MNStruct *)context)->pulse->nobs];
	double *Diffvec=new double[((MNStruct *)context)->pulse->nobs];

	dgemv(((MNStruct *)context)->DMatrix,Fitparams,Fitvec,((MNStruct *)context)->pulse->nobs,numfit,'N');

	for(int o=0;o<((MNStruct *)context)->pulse->nobs; o++){
		Diffvec[o]=((MNStruct *)context)->pulse->obsn[o].residual-Fitvec[o];
	}

	double *WorkGDiffvec=new double[((MNStruct *)context)->Gsize];
	double *GDiffvec=new double[((MNStruct *)context)->Gsize];



	double secday=24*60*60;
	double LongestPeriod=1.0/pow(10.0,-5);
	double flo=1.0/LongestPeriod;

	double modelalpha=redalpha;
	double gwamp=pow(10.0,redamp);
	double gwampsquared=gwamp*gwamp*(pow((365.25*secday),2)/(12*M_PI*M_PI))*(pow(365.25,(1-modelalpha)))/(pow(flo,(modelalpha-1)));

	double timdiff=0;

	double covconst=gsl_sf_gamma(1-modelalpha)*sin(0.5*M_PI*modelalpha);
// 	printf("constants: %g %g \n",gwampsquared,covconst);


	
	double **CovMatrix = new double*[((MNStruct *)context)->pulse->nobs]; for(int o1=0;o1<((MNStruct *)context)->pulse->nobs;o1++)CovMatrix[o1]=new double[((MNStruct *)context)->pulse->nobs];

	for(int o1=0;o1<((MNStruct *)context)->pulse->nobs; o1++){

		for(int o2=0;o2<((MNStruct *)context)->pulse->nobs; o2++){
			timdiff=((MNStruct *)context)->pulse->obsn[o1].bat-((MNStruct *)context)->pulse->obsn[o2].bat;	
			double tau=2.0*M_PI*fabs(timdiff);
			double covsum=0;

			for(int k=0; k <=10; k++){
				covsum=covsum+pow(-1.0,k)*(pow(flo*tau,2*k))/(iter_factorial(2*k)*(2*k+1-modelalpha));

			}

			CovMatrix[o1][o2]=gwampsquared*(covconst*pow((flo*tau),(modelalpha-1)) - covsum);
// 			printf("%i %i %g %g %g\n",o1,o2,CovMatrix[o1][o2],fabs(timdiff),covsum);

			if(o1==o2){
				CovMatrix[o1][o2] += pow(((((MNStruct *)context)->pulse->obsn[o1].toaErr)*pow(10.0,-6))*EFAC[((MNStruct *)context)->sysFlags[o1]],2) + EQUAD;
			}

		}
	}

	double **CG = new double*[((MNStruct *)context)->pulse->nobs]; for(int o1=0;o1<((MNStruct *)context)->pulse->nobs;o1++)CG[o1]=new double[((MNStruct *)context)->Gsize];

	double **GCG= new double*[((MNStruct *)context)->Gsize]; for(int o1=0;o1<((MNStruct *)context)->Gsize;o1++)GCG[o1]=new double[((MNStruct *)context)->Gsize];


	dgemm(CovMatrix,((MNStruct *)context)->GMatrix, CG, ((MNStruct *)context)->pulse->nobs, ((MNStruct *)context)->pulse->nobs, ((MNStruct *)context)->pulse->nobs, ((MNStruct *)context)->Gsize, 'N', 'N');

	dgemm(((MNStruct *)context)->GMatrix,CG, GCG, ((MNStruct *)context)->pulse->nobs, ((MNStruct *)context)->Gsize, ((MNStruct *)context)->pulse->nobs, ((MNStruct *)context)->Gsize, 'T', 'N');

	dgemv(((MNStruct *)context)->GMatrix,Diffvec,GDiffvec,((MNStruct *)context)->pulse->nobs,((MNStruct *)context)->Gsize,'T');

	double covdet=0;
	for(int o1=0;o1<((MNStruct *)context)->Gsize; o1++){
		WorkGDiffvec[o1]=GDiffvec[o1];
	}
	dpotrf(GCG, ((MNStruct *)context)->Gsize, covdet);
        dpotrs(GCG, WorkGDiffvec, ((MNStruct *)context)->Gsize);



	double Chisq=0;


	for(int o1=0;o1<((MNStruct *)context)->Gsize; o1++){
		Chisq += GDiffvec[o1]*WorkGDiffvec[o1];
	}

	if(isnan(covdet) || isinf(covdet) || isnan(Chisq) || isinf(Chisq)){

		lnew=-pow(10.0,200);
// 		printf("red amp and alpha %g %g\n",redamp,redalpha);

		
	}
	else{
		lnew = -0.5*(((MNStruct *)context)->Gsize*log(2*M_PI) + covdet + Chisq);	

	}
// 		printf("Like: %g %g %g \n",lnew,Chisq,covdet);

	delete[] EFAC;
	for(int o=0;o<((MNStruct *)context)->pulse->nobs;o++){delete[] CovMatrix[o];}
	delete[] CovMatrix;
	for(int o=0;o<((MNStruct *)context)->pulse->nobs;o++){delete[] CG[o];}
	delete[] CG;
	for(int o=0;o<((MNStruct *)context)->Gsize;o++){delete[] GCG[o];}
	delete[] GCG;

	delete[] GDiffvec;
	delete[] WorkGDiffvec;
	delete[] Diffvec;
	delete[] Fitvec;
	

}

Esempio n. 29

0

Mostra file

File: matrix_prod.c Progetto: gordon-cs/cps343-hoe

/*---------------------------------------------------------------------------
 *
 * Compute matrix product using BLAS routine DGEMM.
 *
 * Input
 *   int argc        - length of argv[] array
 *   char* argv[]    - pointer to command line parameter array
 *   int verbosity   - program verification: verbosity > 0 gives more output
 *
 * Output
 *   double          - elapsed time for product computation
 */
double multiply_by_blas( int argc, char* argv[], int verbosity )
{
    int rows, cols, mids;
    double **a, **b, **c;
    double t1, t2;
    double sec;
    double gflop_count;

    /*
     * process command line arguments
     */
    rows = atoi( argv[0] );
    mids = atoi( argv[1] );
    cols = atoi( argv[2] );
    gflop_count = 2.0 * rows * mids * cols / 1.0e9;

    if ( verbosity > 0 )
    {
        printf( "BLAS: rows = %d, mids = %d, columns = %d\n",
                rows, mids, cols );
    }

    /*
     * allocate and initialize matrices
     */
    a = (double**) allocateMatrix( rows, mids );
    b = (double**) allocateMatrix( mids, cols );
    c = (double**) allocateMatrix( rows, cols );
    initialize_matrices( a, b, c, rows, cols, mids, verbosity );

    /*
     * compute product: There is an implicit matrix transpose when
     * passing from Fortran to C and vice-versa.  To compute C :=
     * alpha * A * B + beta * C we use dgemm() to compute C' := alpha
     * * B' * A' + beta * C'.  The first two arguments to dgemm() are
     * 'N' indicating we don't want a transpose in addition to the
     * implicit one.  The matrices A and B are passed in reverse order
     * so dgemm() receives (after the implicit transpose) B' and A'.
     * Arguments 3 and 4 are the dimensions of C' and argument 5 is
     * the column dimension of B' (and the row dimension of A').
     */
    t1 = wtime();
    dgemm( 'N', 'N', cols, rows, mids, 1.0, &b[0][0], cols, &a[0][0], mids, 
           0.0, &c[0][0], cols );
    t2 = wtime();
    sec = t2 - t1;

    if ( verbosity > 1 )
        printf( "checksum = %f\n", checksum( c, rows, cols ) );

    printf( "BLAS:        %6.3f secs %6.3f gflops ( %5d x %5d x %5d )\n",
            sec, gflop_count / sec, rows, mids, cols );

    /*
     * clean up
     */
    deallocateMatrix( a );
    deallocateMatrix( b );
    deallocateMatrix( c );

    return t2 - t1;
}

Esempio n. 30

0

Mostra file

File: dynamics.cpp Progetto: CURG/graspit_bci

int
iterateDynamics(std::vector<Robot *> robotVec,
		std::vector<DynamicBody *> bodyVec,
		DynamicParameters *dp)		
{
  double h = dp->timeStep;
  bool useContactEps = dp->useContactEps;
  static double Jcg_tmp[9],Jcg_B[9],Jcg_N[9],Jcg_N_inv[9],R_N_B[9];
  static double db0=0.0,tmp3[3];
  static mat3 Rot;
  static int info;
  World *myWorld;
  KinematicChain *chain;
  int numBodies = bodyVec.size(),errCode = 0;
  int numRobots = robotVec.size();
  int numJoints=0;
  int numDOF=0;
  int bn,cn,i,j;
  int Mrows,Dcols,Arows,Hrows,Hcols,Nurows,Nucols;
  int numDOFLimits=0;

  std::list<Contact *> contactList;
  std::list<Contact *> objContactList;
  std::list<Contact *>::iterator cp;

  //  unsigned long dmark = dmalloc_mark();

  double *ql = new double[7*numBodies];
  double *qnew = new double[7*numBodies];
  double *vl = new double[6*numBodies];
  double *vlnew = new double[6*numBodies];
  double *M = new double[(6*numBodies)*(6*numBodies)];
  double *M_i = new double[(6*numBodies)*(6*numBodies)];
  double *fext = new double[6*numBodies];

  // LCP matrix
  double *A;

  // LCP vectors
  double *g,*lambda;
  double *predLambda = NULL; //used for debugging the prediction of LCP basis

  // main matrices for contact constraints
  double *H;

  // main matrices for joint constraints
  double *Nu;

  // main vector for contact constraints
  double *k;
  
  // main vectors for joint constraints
  double *eps;

  // intermediate matrices for contact constraints
  double *HtM_i,*v1;

  // intermediate matrices for contact constraints
  double *v2;

  // intermediate matrices for case of both joint and contact constraints
  double *NutM_i,*NutM_iNu,*INVNutM_iNu,*INVNutM_iNuNut;
  double *INVNutM_iNuNutM_i,*INVNutM_iNuNutM_iH;

  // intermediate vectors for case of both joint and contact constraints
  double *NutM_ikminuseps,*INVNutM_iNuNutM_ikminuseps;

  double *currq,*currM;

  Mrows = 6*numBodies;

  myWorld = bodyVec[0]->getWorld();

  std::map<Body*, int> islandIndices;
  for (i=0;i<myWorld->getNumBodies();i++) {
	islandIndices.insert( std::pair<Body*, int>(myWorld->getBody(i), -1) );
  }
  for (i=0;i<numBodies;i++) {
	islandIndices[ bodyVec[i] ] = i;
  }

  // count the joints and DOF, and the joint coupling constraints
  int numCouplingConstraints = 0;
  for (i=0;i<numRobots;i++) {
    numDOF += robotVec[i]->getNumDOF();
    for (j=0;j<robotVec[i]->getNumChains();j++) {
      chain = robotVec[i]->getChain(j);
      numJoints += chain->getNumJoints();
    }
	for (j=0;j<robotVec[i]->getNumDOF();j++) {
	  numCouplingConstraints += robotVec[i]->getDOF(j)->getNumCouplingConstraints();
	  numDOFLimits += robotVec[i]->getDOF(j)->getNumLimitConstraints();
	}
  }

  DBGP("Dynamics time step: " << h);
  DBGP("numJoints: " << numJoints);

  // count the total number of joints and contacts
  int numContacts = 0;
  int numTotalFrictionEdges = 0;
  int numDynJointConstraints=0;
  for (bn=0;bn<numBodies;bn++) {
    //count joints
    if (bodyVec[bn]->getDynJoint()) {
	  int numCon = bodyVec[bn]->getDynJoint()->getNumConstraints();
	  numDynJointConstraints += numCon;
	  DBGP(bodyVec[bn]->getName().latin1() << ": " << numCon << " constraints");
    }
	//count contacts
    objContactList = bodyVec[bn]->getContacts();
    for (cp=objContactList.begin();cp!=objContactList.end();cp++) {
      // check if the mate of this contact is already in the contact list
      if (std::find(contactList.begin(),contactList.end(),(*cp)->getMate()) == contactList.end()) {
		numContacts++;
		numTotalFrictionEdges += (*cp)->numFrictionEdges;
		contactList.push_back(*cp);
      }
	}
  }

  DBGP("Num contacts: " << numContacts);
  DBGP("Num friction edges: " << numTotalFrictionEdges);
  DBGP("Num dynjoint: " << numDynJointConstraints);

  // zero out matrices
  dcopy(Mrows*Mrows,&db0,0,M,1);
  dcopy(Mrows*Mrows,&db0,0,M_i,1);
  dcopy(Mrows,&db0,0,fext,1);

  //allocate the joint constraint matrices
  if (numJoints) {
    Nurows = Mrows;
    Nucols = numDynJointConstraints + numCouplingConstraints;
    DBGP("Nucols: " << Nucols);

    Nu = new double[Nurows * Nucols];
    dcopy(Nurows*Nucols,&db0,0,Nu,1);

    eps = new double[Nucols];
    dcopy(Nucols,&db0,0,eps,1);
    Arows = Mrows+Nucols;
  }
    
  // allocate the LCP matrix
  if (numContacts || numDOFLimits) {
	Dcols = numTotalFrictionEdges;

    DBGP("numContacts " << numContacts);	
    DBGP("Dcols " << Dcols);
    DBGP("numDOFLimits " << numDOFLimits);

    Hrows = Mrows;
    Hcols = Dcols + 2*numContacts + numDOFLimits;
    H = new double[Hrows * Hcols];

    dcopy(Hrows*Hcols,&db0,0,H,1);

    v1 = new double[Hrows * Hcols];
    v2 = new double[Hrows];
    dcopy(Hrows*Hcols,&db0,0,v1,1);
    dcopy(Hrows,&db0,0,v2,1);

    k = new double[Mrows]; //holds mass*previous velocity and external impulses
    Arows = Hcols;
    lambda = new double[Arows];  // the LCP solution    
  } else {
    Dcols = 0;
  }

  // allocate the constraint matrix
  if (numJoints || numContacts) {    
    A = new double[Arows*Arows];
    g = new double[Arows];

    dcopy(Arows*Arows,&db0,0,A,1); 
    dcopy(Arows,&db0,0,g,1); 
  }

  // compute mass matrix and external forces
  for (bn=0;bn<numBodies;bn++) {
	memcpy(vl+6*bn,bodyVec[bn]->getVelocity(),6*sizeof(double));
	memcpy(vlnew+6*bn,bodyVec[bn]->getVelocity(),6*sizeof(double));

    memcpy(ql+7*bn,bodyVec[bn]->getPos(),7*sizeof(double));    
    memcpy(qnew+7*bn,bodyVec[bn]->getPos(),7*sizeof(double));

    currq = qnew + 7*bn;    
    Quaternion tmpQuat(currq[3],currq[4],currq[5],currq[6]);
    tmpQuat.ToRotationMatrix(Rot);   

    // The rotation matrix returned by ToRotationMatrix is expressed as
    // a graphics style rot matrix (new axes are in rows), the R_N_B matrix
    // is a robotics style rot matrix (new axes in columns)
    
    R_N_B[0] = Rot[0];  R_N_B[3] = Rot[1];  R_N_B[6] = Rot[2];
    R_N_B[1] = Rot[3];  R_N_B[4] = Rot[4];  R_N_B[7] = Rot[5];
    R_N_B[2] = Rot[6];  R_N_B[5] = Rot[7];  R_N_B[8] = Rot[8];

    // Jcg_N = R_N_B * Jcg_B * R_N_B'; 
    // where Jcg_B is inertia matrix in body coords
    //       Jcg_N is inertia matrix in world coords ?
    memcpy(Jcg_B,bodyVec[bn]->getInertia(),9*sizeof(double));
	//multiply by mass
	dscal(9, bodyVec[bn]->getMass(), Jcg_B, 1);
    dgemm("N","N",3,3,3,1.0,R_N_B,3,Jcg_B,3,0.0,Jcg_tmp,3);
    dgemm("N","T",3,3,3,1.0,Jcg_tmp,3,R_N_B,3,0.0,Jcg_N,3);

	if ((info = invertMatrix(3,Jcg_N,Jcg_N_inv))) {
      printf("In iterateDynamics, inertia matrix inversion failed (info is %d)\n",info);
	  fprintf(stderr,"%f %f %f\n",Jcg_B[0], Jcg_B[1], Jcg_B[2]);
	  fprintf(stderr,"%f %f %f\n",Jcg_B[3], Jcg_B[4], Jcg_B[5]);
	  fprintf(stderr,"%f %f %f\n",Jcg_B[6], Jcg_B[7], Jcg_B[8]);
	  fprintf(stderr,"Body is %s\n",bodyVec[bn]->getName().latin1());
	}
    
    currM = M+((6*bn)*Mrows + bn*6);  //point to the correct block of M
    
    currM[0]              = bodyVec[bn]->getMass();
    currM[6*numBodies+1]  = bodyVec[bn]->getMass();
    currM[12*numBodies+2] = bodyVec[bn]->getMass();
    fillMatrixBlock(Jcg_N,3,3,3,5,5,currM,Mrows);
  
    currM = M_i+((6*bn)*Mrows + bn*6);//point to correct block of M_i

    currM[0]         = 1.0/bodyVec[bn]->getMass();
    currM[Mrows+1]   = 1.0/bodyVec[bn]->getMass();
    currM[2*Mrows+2] = 1.0/bodyVec[bn]->getMass();
    fillMatrixBlock(Jcg_N_inv,3,3,3,5,5,currM,Mrows);

    // compute external wrench
    // fext = [ 0 0 -9810.0*mass -[ang_vel_N x (Jcg_N * ang_vel_N)] ]
	//based on this, it would appear that graspit force units are N*1.0e6
	fext[6*bn+2] = -9810.0 * bodyVec[bn]->getMass() * dp->gravityMultiplier;  // force of gravity
	// fext[6*bn+2] = 0;  // NO force of gravity

    dgemv("N",3,3,1.0,Jcg_N,3,&vl[6*bn+3],1,0.0,tmp3,1);  // inertial moments
    fext[6*bn+3] = - (vl[6*bn+4]*tmp3[2] - vl[6*bn+5]*tmp3[1]);
    fext[6*bn+4] = - (vl[6*bn+5]*tmp3[0] - vl[6*bn+3]*tmp3[2]);
    fext[6*bn+5] = - (vl[6*bn+3]*tmp3[1] - vl[6*bn+4]*tmp3[0]);

    double ForcesToBodyFrame[36];
    transf invBody = bodyVec[bn]->getTran().inverse();
    vec3 invBodyTransl = invBody.translation();
    buildForceTransform(invBody,invBodyTransl,ForcesToBodyFrame);
	DBGP("fext initial: ");
    DBGST( disp_mat(stdout,&fext[6*bn],1,6,0) );

    // add any other wrenches that have accumulated on the body
    daxpy(6,1.0,bodyVec[bn]->getExtWrenchAcc(),1,&fext[6*bn],1);
	DBGP("fext with accumulated wrench: ");
    DBGST( disp_mat(stdout,&fext[6*bn],1,6,0) );

	if (numContacts||numDOFLimits) {
      // k = Mv_l + hfext
      currM = M+((6*bn)*Mrows + bn*6);  //point to the correct block of M
      dgemv("N",6,6,1.0,currM,Mrows,vl+6*bn,1,0.0,k+6*bn,1);
    }
  }

  if (numJoints) {
    int ncn = 0;
	int hcn = 0;
	for (i=0;i<numBodies;i++) {
	  if (bodyVec[i]->getDynJoint())
		bodyVec[i]->getDynJoint()-> buildConstraints(Nu,eps,numBodies,islandIndices,ncn);
	}
	for (i=0;i<numRobots;i++) {
      robotVec[i]->buildDOFLimitConstraints(islandIndices,numBodies,H,g,hcn);
      robotVec[i]->buildDOFCouplingConstraints(islandIndices,numBodies,Nu,eps,ncn);
	}
	for (i=0;i<Nucols;i++) {
	  eps[i] *= ERP/h;
	}
	for (i=0; i<hcn; i++) {
		g[i] *= ERP/h;
	}
  }

  // add contacts to the LCP
  if (!contactList.empty()) {
    DBGP("processing contacts");
    double Ftform_N_C[36];
    
    // A is square
    double *Wn = &H[numDOFLimits*Hrows];
    double *D  = &H[(numDOFLimits+numContacts)*Hrows];
    
    double *E =		&A[(numDOFLimits+numContacts+Dcols)*Arows + numDOFLimits+numContacts];
    double *negET = &A[(numDOFLimits+numContacts)*Arows + numDOFLimits+numContacts+Dcols]; 
    double *MU    = &A[numDOFLimits*Arows + numDOFLimits+numContacts+Dcols];
    double *contactEps = &g[numDOFLimits];

	int frictionEdgesCount = 0;
    for (cp=contactList.begin(),cn=0; cp!=contactList.end(); cp++,cn++){

      //DBGP("contact " << cn);
      transf cf  = (*cp)->getContactFrame() *  (*cp)->getBody1Tran();
      transf cf2 = (*cp)->getMate()->getContactFrame() * (*cp)->getBody2Tran();

      DBGP("CONTACT DISTANCE: " << (cf.translation() - cf2.translation()).len());
      if (useContactEps) {
            contactEps[cn] = MIN(0.0,-ERP/h *
      			(Contact::THRESHOLD/2.0 - (cf.translation() - cf2.translation()).len()));
	  }
      DBGP(" EPS: " << contactEps[cn]);
      vec3 normal(cf.affine().element(2,0), cf.affine().element(2,1), cf.affine().element(2,2));
        
      // find which body is this contact from
      for (bn=0;bn<numBodies;bn++)
	    if ((*cp)->getBody1() == bodyVec[bn]) break;
      if (bn<numBodies) {
		//????? this doesn't seem correct
       	vec3 radius = cf.translation() - ( bodyVec[bn]->getCoG() * (*cp)->getBody1Tran() - position::ORIGIN );

	    //	radius = radius / 1000.0;  // convert to meters

		vec3 RcrossN = radius * normal;
		DBGP("body1 normal: " << normal);
		DBGP("body1 radius: " << radius);

		Wn[cn*Hrows+6*bn]   = normal.x();
		Wn[cn*Hrows+6*bn+1] = normal.y();
		Wn[cn*Hrows+6*bn+2] = normal.z();
		Wn[cn*Hrows+6*bn+3] = RcrossN.x();
		Wn[cn*Hrows+6*bn+4] = RcrossN.y();
		Wn[cn*Hrows+6*bn+5] = RcrossN.z();
	
		vec3 bodyOrigin = bodyVec[bn]->getCoG() * (*cp)->getBody1Tran() - position::ORIGIN;
		buildForceTransform(cf,bodyOrigin,Ftform_N_C);

		/* dgemm("N","N", 6,Contact::numFrictionEdges,6, 1.0,Ftform_N_C,6, Contact::frictionEdges,6,
			    0.0,&D[Contact::numFrictionEdges*cn*Hrows+6*bn],Hrows); */
				
		dgemm("N","N",
				6,(*cp)->numFrictionEdges,6,  //m, n, k
				1.0,Ftform_N_C,6,			 //alfa, A, lda
				(*cp)->frictionEdges,6,		//B, ldb
			    0.0,&D[ frictionEdgesCount*Hrows+6*bn],Hrows);	//beta, C, ldc
	  }

      //find the other body
      for(bn=0;bn<numBodies;bn++)
		if ((*cp)->getBody2() == bodyVec[bn]) break;
      if (bn<numBodies) {

        //normal = vec3(cf2.affine().element(2,0), cf2.affine().element(2,1),cf2.affine().element(2,2));
		normal = -normal;

		//vec3 radius = cf2.translation() - (bodyVec[bn]->getCoG() * (*cp)->getBody2Tran() - position::ORIGIN);
		vec3 radius = cf.translation() - (bodyVec[bn]->getCoG() * (*cp)->getBody2Tran() - position::ORIGIN);
		vec3 RcrossN = radius * normal;
		DBGP("body2 normal: " << normal);
		DBGP("body2 radius: " << radius);

		Wn[cn*Hrows+6*bn]   = normal.x();
		Wn[cn*Hrows+6*bn+1] = normal.y();
		Wn[cn*Hrows+6*bn+2] = normal.z();
		Wn[cn*Hrows+6*bn+3] = RcrossN.x();
		Wn[cn*Hrows+6*bn+4] = RcrossN.y();
		Wn[cn*Hrows+6*bn+5] = RcrossN.z();
	
		vec3 bodyOrigin = bodyVec[bn]->getCoG()*(*cp)->getBody2Tran() - position::ORIGIN;
		buildForceTransform(cf,bodyOrigin,Ftform_N_C);
		//buildForceTransform(cf2,bodyOrigin,Ftform_N_C);

/*		dgemm("N","N",6,Contact::numFrictionEdges,6,-1.0,Ftform_N_C,6, Contact::frictionEdges,6,
			  0.0,&D[Contact::numFrictionEdges*cn*Hrows+6*bn],Hrows);*/
		//original graspit had a -1.0 here in front of Ftform_N_C
		dgemm("N","N",
				6,(*cp)->numFrictionEdges,6,
				-1.0,Ftform_N_C,6,
				(*cp)->frictionEdges,6,
				0.0,&D[ frictionEdgesCount*Hrows+6*bn ],Hrows);
      }

      //for (i=cn*Contact::numFrictionEdges; i<(cn+1)*Contact::numFrictionEdges; i++) {
	  for (i=frictionEdgesCount; i<frictionEdgesCount+(*cp)->numFrictionEdges; i++) {
		E[cn*Arows+i] = 1.0;
		negET[i*Arows+cn] = -1.0;
      }      
      MU[cn*Arows + cn] = (*cp)->getCof();
	  frictionEdgesCount += (*cp)->numFrictionEdges;
    }
  }
  
  if (numContacts || numDOFLimits)
    daxpy(Mrows,h,fext,1,k,1);

  if (numJoints && (numContacts || numDOFLimits)) {
    // Cnu1 = INV(Nu'M_iNu)Nu'M_iH
    // Cnu2 = INV(Nu'M_iNu)(Nu'M_ik-eps)
    // v1 = -NuCnu1
    // v2 = -NuCnu2
    
    NutM_i = new double[Nucols*Mrows];
    NutM_iNu = new double[Nucols*Nucols];
    INVNutM_iNu = new double[Nucols*Nucols];
    INVNutM_iNuNut = new double[Nucols*Nurows];
    INVNutM_iNuNutM_i = new double[Nucols*Mrows];
    INVNutM_iNuNutM_iH = new double[Nucols*Hcols];
    

    NutM_ikminuseps = new double[Nucols];
    INVNutM_iNuNutM_ikminuseps = new double[Nucols];
    
    dgemm("T","N",Nucols,Mrows,Mrows,1.0,Nu,Nurows,M_i,Mrows, 0.0,NutM_i,Nucols);
    dgemm("N","N",Nucols,Nucols,Mrows,1.0,NutM_i,Nucols,Nu,Nurows, 0.0,NutM_iNu,Nucols);
    if ((info = invertMatrix(Nucols,NutM_iNu,INVNutM_iNu)))
      printf("In iterateDynamics, NutM_iNu matrix inversion failed (info is %d)\n",info);
    
    dgemm("N","T",Nucols,Nurows,Nucols,1.0,INVNutM_iNu,Nucols,Nu,Nurows,
	  0.0,INVNutM_iNuNut,Nucols);
    dgemm("N","N",Nucols,Mrows,Mrows,1.0,INVNutM_iNuNut,Nucols,M_i,Mrows,
	  0.0,INVNutM_iNuNutM_i,Nucols);
    dgemm("N","N",Nucols,Hcols,Mrows,1.0,INVNutM_iNuNutM_i,Nucols,H,Hrows,
	  0.0,INVNutM_iNuNutM_iH,Nucols);
    dgemm("N","N",Nurows,Hcols,Nucols,-1.0,Nu,Nurows,INVNutM_iNuNutM_iH,Nucols,
	  0.0,v1,Nurows);

    dgemv("N",Nucols,Mrows,1.0,NutM_i,Nucols,k,1,0.0,NutM_ikminuseps,1);
    daxpy(Nucols,-1.0,eps,1,NutM_ikminuseps,1);

    dgemv("N",Nucols,Nucols,1.0,INVNutM_iNu,Nucols,NutM_ikminuseps,1,
	  0.0,INVNutM_iNuNutM_ikminuseps,1);

    dgemv("N",Nurows,Nucols,-1.0,Nu,Nurows,INVNutM_iNuNutM_ikminuseps,1,
	  0.0,v2,1);
  }

  if (numContacts || numDOFLimits) {
    // in the simple case without joint constraints
    // A = H'M_iv1 + N
    // g = H'M_iv2
    // where N is already stored in A
    // v1 is the first term of v_(l+1) and v2 is the second term
    // v_l+1 = M_i(v1 lambda + v2) = M_i(H lambda + k)
    // k is (Mv_l + hfext)

    //add H to v1
    //add k to v2
    DBGP("k:");
    DBGST( disp_mat(stdout,k,1,Mrows,0) );
    DBGP("first g:");
    DBGST( disp_mat(stdout,g,1,Arows,0) );

	daxpy(Mrows*Hcols,1.0,H,1,v1,1);
    daxpy(Mrows,1.0,k,1,v2,1);

    // build A and g
    HtM_i = new double[Hcols*Mrows];
    dgemm("T","N",Hcols,Mrows,Hrows,1.0,H,Hrows,M_i,Mrows,0.0,HtM_i,Hcols);

    dgemm("N","N",Hcols,Hcols,Mrows,1.0,HtM_i,Hcols,v1,Mrows,1.0,A,Arows);
    //    dgemv("N",Hcols,Mrows,1.0,HtM_i,Hcols,v2,1,0.0,g,1);
    dgemv("N",Hcols,Mrows,1.0,HtM_i,Hcols,v2,1,1.0,g,1);
  }

	int frictionEdgesCount;
	//debug information; can be removed

	if (numContacts || numDOFLimits) {
		bool lemkePredict = false;
		if (lemkePredict) {
			//try to use information from previous time steps to guess a good starting basis for Lemke's algorithm
			assembleLCPPrediction(lambda, Arows, numDOFLimits, &contactList);
	        predLambda = new double[Arows];  // keep a copy of the prediction so we can check it later
			dcopy(Arows, lambda, 1, predLambda, 1);
//			fprintf(stderr,"Prediction: \n");
//			printLCPBasis(predLambda, Arows, numDOFLimits, numContacts);
		}

	    //    double startTime;   
	    //    startTime = getTime();
   
		DBGP("g:");
		DBGST( for (i=0;i<Arows;i++) printf("%le ",g[i]); );