示例#1
0
/*
 *check M z + q =0
 *
 *
 */
double LinearSystem_computeError(LinearSystemProblem* problem, double *z)
{
  double * pM = problem->M->matrix0;
  double * pQ = problem->q;
  double error = 10;
  int n = problem->size;
  double * res = (double*)malloc(n * sizeof(double));
  memcpy(res, pQ, n * sizeof(double));
  cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, 1.0, pM, n, z, 1, 1.0, res, 1);
  error = cblas_dnrm2(n, res, 1);
  free(res);
  return error;

}
示例#2
0
///////////////Problem///////////////
Problem::Problem(long n,std::string structure="diagonal", std::string type = "nice") {

    m_A = Tools::initArray(n*n);
    m_x = Tools::initArray(n);
    m_b = Tools::initArray(n);
    m_n = n;

    double (*f_pntr)(long);

    if ( type.compare("nice") ) {

        f_pntr = randomNumber;

    } else if (type.compare("random")) {

        f_pntr = niceNumber;

    }

    if ( !structure.compare("diagonal") ) {

        for (long ii = 0; ii < m_n; ii++ ) {

            m_A[ii + m_n*ii] = f_pntr(ii);
            m_x[ii] = f_pntr(ii);

        }

    } else if (!structure.compare("dense")) {
    
        for( long ii = 0; ii < m_n*m_n; ii++) {

            m_A[ii] = f_pntr(ii); 

        }

        for( long ii = 0; ii < m_n; ii++) {

            m_x[ii] = f_pntr(ii); 

        }

    } 

    int inc = 1;
    cblas_dgemv(CblasColMajor,CblasNoTrans,m_n,m_n,1.0,m_A,m_n,m_x,inc,0.0,m_b, inc);


}
示例#3
0
JNIEXPORT void JNICALL Java_uncomplicate_neanderthal_CBLAS_dgemv
(JNIEnv * env, jclass clazz,
 jint Order, jint TransA,
 jint M, jint N,
 jdouble alpha,
 jobject A, jint lda,
 jobject X, jint incX,
 jdouble beta,
 jobject Y, jint incY) {

  double *cA = (double *) (*env)->GetDirectBufferAddress(env, A);
  double *cX = (double *) (*env)->GetDirectBufferAddress(env, X);
  double *cY = (double *) (*env)->GetDirectBufferAddress(env, Y);
  cblas_dgemv(Order, TransA, M, N, alpha, cA, lda, cX, incX, beta, cY, incY);
};
示例#4
0
ECLBLAS_CALL void dpotf2(bool & __isAllResult, size32_t & __lenResult,
                         void * & __result, uint8_t tri, uint32_t r,
                         bool isAllA, size32_t lenA, const void * A,
                         bool clear) {
  unsigned int cells = r*r;
  __isAllResult = false;
  __lenResult = cells * sizeof(double);
  double *new_a = (double*) rtlMalloc(__lenResult);
  memcpy(new_a, A, __lenResult);
  double ajj;
  // x and y refer to the embedded vectors for the multiply, not an axis
  unsigned int diag, a_pos, x_pos, y_pos;
  unsigned int col_step = r;  // between columns
  unsigned int row_step = 1;  // between rows
  unsigned int x_step = (tri==UPPER_TRIANGLE)  ? row_step  : col_step;
  unsigned int y_step = (tri==UPPER_TRIANGLE)  ? col_step  : row_step;
  for (unsigned int j=0; j<r; j++) {
    diag = (j * r) + j;    // diagonal
    x_pos = j * ((tri==UPPER_TRIANGLE) ? col_step  : row_step);
    a_pos = (j+1) * ((tri==UPPER_TRIANGLE) ? col_step  : row_step);
    y_pos = diag + y_step;
    // ddot.value <- x'*y
    ajj = new_a[diag] - cblas_ddot(j, (new_a+x_pos), x_step, (new_a+x_pos), x_step);
    //if ajj is 0, negative or NaN, then error
    if (ajj <= 0.0) {
      rtlFree(new_a);
      rtlFail(0, "Not a positive definite matrix");
    }
    ajj = sqrt(ajj);
    new_a[diag] = ajj;
    if ( j < r-1) {
      // y <- alpha*op(A)*x + beta*y
      cblas_dgemv(CblasColMajor,
                  (tri==UPPER_TRIANGLE)  ? CblasTrans  : CblasNoTrans,
                  (tri==UPPER_TRIANGLE)  ? j           : r-1-j,    // M
                  (tri==UPPER_TRIANGLE)  ? r-1-j       : j,        // N
                   -1.0,                          // alpha
                   (new_a+a_pos), r,              //A
                   (new_a+x_pos), x_step,         //X
                   1.0, (new_a+y_pos), y_step);   // beta and Y
      // x <- alpha * x
      cblas_dscal(r-1-j, 1.0/ajj, (new_a+y_pos), y_step);
    }
    // clear lower or upper part if clear flag set
    for(unsigned int k=1; clear && k<r-j; k++) new_a[(k*x_step)+diag] = 0.0;
  }
  __result = (void*) new_a;
}
示例#5
0
void method5(int M,int N,int T,double *Xin,double *Xout,double *Kern) {

    omp_set_num_threads(10);
#pragma omp parallel
  {
    int tid = omp_get_thread_num();
    int nthreads=omp_get_num_threads();

    if (tid==0) printf("Using %d threads\n",omp_get_num_threads());

    int i1=((N-T)*tid)/nthreads;
    int i2=((N-T)*(tid+1))/nthreads;
    for (int n=i1; n<i2; n++) {
        cblas_dgemv(CblasColMajor, CblasNoTrans, M, T, 1.0, &Xin[M*n], M, Kern, 1, 0.0, &Xout[M*n], 1);
    }
  }
}
示例#6
0
void VectorView::set_to_product(const MatrixView& m, const VectorView& v,
                                const bool transpose)
{
  CBLAS_TRANSPOSE tr;
  if (transpose){
    tr = CblasTrans;
    assert(m.cols() == length());
    assert(m.rows() == v.length());
  } else {
    tr = CblasNoTrans;
    assert(m.cols() == v.length());
    assert(m.rows() == length());
  }

  cblas_dgemv(CblasColMajor, tr, m.rows(), m.cols(), 1.0, m.data(),
              m.stride(), v.data(), 1, 0.0, data_, 1);
}
示例#7
0
文件: mkl.c 项目: joyhuang9473/BigDL
    /*
     * Class:     com_intel_analytics_bigdl_mkl_MKL
     * Method:    vdgemv
     * Signature: (SSIIID[DII[DIID[DII)V
     */
JNIEXPORT void JNICALL Java_com_intel_analytics_bigdl_mkl_MKL_vdgemv
   (JNIEnv * env, jclass cls, jchar trans, jint m, jint n,
   jdouble alpha, jdoubleArray a, jint aOffset, jint lda, jdoubleArray x,
   jint xOffset, jint incx, jdouble beta, jdoubleArray y, jint yOffset, jint incy) {

   jdouble * jni_a = (*env)->GetPrimitiveArrayCritical(env, a, JNI_FALSE);
   jdouble * jni_x = (*env)->GetPrimitiveArrayCritical(env, x, JNI_FALSE);
   jdouble * jni_y = (*env)->GetPrimitiveArrayCritical(env, y, JNI_FALSE);

   int jni_trans;
   if(trans == 't' || trans == 'T') jni_trans = CblasTrans; else jni_trans = CblasNoTrans;
   cblas_dgemv(CblasColMajor, jni_trans, m, n, alpha, jni_a + aOffset, lda, jni_x + xOffset, incx,
      beta, jni_y + yOffset, incy);

   (*env)->ReleasePrimitiveArrayCritical(env, a, jni_a, 0);
   (*env)->ReleasePrimitiveArrayCritical(env, x, jni_x, 0);
   (*env)->ReleasePrimitiveArrayCritical(env, y, jni_y, 0);
}
double * solve(size_t nfield, double * perm, double h, double lb, double ub)
{
    double * op = buildOP(nfield,perm,h);
    double * rhs = buildRHS(nfield-1,lb+h,ub);
    
    double * inv = calloc_double((nfield-1) * (nfield-1));
    pinv(nfield-1,nfield-1,nfield-1,op,inv,0.0);
    
    double * sol = calloc_double(nfield);
    cblas_dgemv(CblasColMajor,CblasNoTrans,nfield-1,nfield-1,1.0,inv,nfield-1,
                rhs,1,0.0,sol+1,1);

    free(op); op = NULL;
    free(rhs); rhs = NULL;
    free(inv); inv = NULL;

    return sol;
}
示例#9
0
文件: test2.c 项目: yshl/test-myblas
void test_dgemv_trans2()
{
    const size_t m=35, n=45;
    double a[m*n];
    double x[m];
    double y[n];
    double z[n];
    size_t i;
    for(i=0; i<m*n; i++) a[i]=i;
    for(i=0; i<n; i++) x[i]=i+m*n;
    for(i=0; i<m; i++) y[i]=z[i]=i*i;

    my_dgemv(CblasRowMajor,CblasTrans,m,n,2.0,a,n,x,1,2.0,y,1);
    cblas_dgemv(CblasRowMajor,CblasTrans,m,n,2.0,a,n,x,1,2.0,z,1);
    for(i=0; i<m; i++){
	assert(y[i]==z[i]);
    }
}
示例#10
0
int
gsl_blas_dgemv(CBLAS_TRANSPOSE_t TransA, double alpha,
	       const gsl_matrix * A, const gsl_vector * X, double beta,
	       gsl_vector * Y)
{
    const size_t M = A->size1;
    const size_t N = A->size2;

    if ((TransA == CblasNoTrans && N == X->size && M == Y->size)
	|| (TransA == CblasTrans && M == X->size && N == Y->size)) {
	cblas_dgemv(CblasRowMajor, TransA, INT(M), INT(N), alpha, A->data,
		    INT(A->tda), X->data, INT(X->stride), beta, Y->data,
		    INT(Y->stride));
	return GSL_SUCCESS;
    } else {
	GSL_ERROR("invalid length", GSL_EBADLEN);
    }
}
示例#11
0
文件: test2.c 项目: yshl/test-myblas
void test_dgemv()
{
    const size_t m=3, n=4;
    double a[3*4]={
	1,2,3,4,
	5,6,7,8,
	9,10,11,12
    };
    double x[4]={2,1,4,3};
    double y[3]={6,5,7};
    double z[3]={6,5,7};
    size_t i;
    my_dgemv(CblasRowMajor,CblasNoTrans,m,n,2.0,a,n,x,1,2.0,y,1);
    cblas_dgemv(CblasRowMajor,CblasNoTrans,m,n,2.0,a,n,x,1,2.0,z,1);
    for(i=0; i<m; i++){
	assert(y[i]==z[i]);
    }
}
void ReducedLinearForceModel::GetInternalForce(double * q, double * internalForces)
{
  CBLAS_ORDER     order= CblasColMajor;
  CBLAS_TRANSPOSE trans= CblasNoTrans;
  int m = r;
  int n = r;
  double alpha = 1;
  double * a = stiffnessMatrix;
  int lda = r;
  double * x = q;
  int incx = 1;
  double beta = 0;
  double * y = internalForces; 
  int incy = 1;

  cblas_dgemv(order, trans, m, n, alpha, a, lda, x, incx,
                  beta, y, incy);
}
示例#13
0
int main(int iArgCnt, char* sArrArgs[])
{
	int iIterationNo = 0;
	double dNormOfResult = 0;
	double dTime0 = 0, dTime1 = 0, dTimeDiff = 0, dMinTimeDiff = DBL_MAX, dMaxTimeDiff = 0;

	parseInputs(iArgCnt, sArrArgs);

	MPI_Init(&iArgCnt, &sArrArgs);
	MPI_Comm_size(MPI_COMM_WORLD, &GiProcessCnt);
	MPI_Comm_rank(MPI_COMM_WORLD, &GiProcessRank);

	initData();

	for(iIterationNo = 0; iIterationNo < GiIterationCnt; iIterationNo++)
	{
		MPI_Barrier(MPI_COMM_WORLD);
		dTime0 = MPI_Wtime();

		cblas_dgemv(CblasRowMajor, CblasNoTrans, GiRowCntForOneProc, GiVectorLength, 1.0, GdArrSubMatrix, GiVectorLength, GdArrVector, 1, 0.0, GdArrSubResult, 1);

		MPI_Barrier(MPI_COMM_WORLD);
		MPI_Gather(GdArrSubResult, GiRowCntForOneProc, MPI_DOUBLE, GdArrTotalResult, GiRowCntForOneProc, MPI_DOUBLE, 0, MPI_COMM_WORLD);

		dTime1 = MPI_Wtime();
		dTimeDiff = (dTime1 - dTime0);

		if(dTimeDiff > dMaxTimeDiff)
			dMaxTimeDiff = dTimeDiff;
		if(dTimeDiff < dMinTimeDiff)
			dMinTimeDiff = dTimeDiff;
	}

	if(GiProcessRank == 0)
	{
		dNormOfResult = cblas_dnrm2(GiVectorLength, GdArrTotalResult, 1);
		printf("Result=%f\nMin Time=%f uSec\nMax Time=%f uSec\n", dNormOfResult, (1.e6 * dMinTimeDiff), (1.e6 * dMaxTimeDiff));
	}

	MPI_Finalize();

	return 0;
}
示例#14
0
/** In place prediction of the next state mean and covariances
 */
void predict_forward(kf_t *kf)
{
  //TODO take advantage of sparsity in this function

  double x[kf->state_dim];
  memcpy(x, kf->state_mean, kf->state_dim * sizeof(double));

  //TODO make more efficient via the structure of the transition matrix
  cblas_dgemv(CblasRowMajor, CblasNoTrans, // CBLAS_ORDER, CBLAS_TRANSPOSE
              kf->state_dim, kf->state_dim, // int M, int N,
              1, (double *) kf->transition_mtx, kf->state_dim, // double 1, double *A, int lda
              x, 1, // double *X, int incX
              0, kf->state_mean, 1); // double beta, double *Y, int incY
  // VEC_PRINTF((double *) state_mean, kf->state_dim);

  double state_cov[kf->state_dim * kf->state_dim];
  reconstruct_udu(kf->state_dim, kf->state_cov_U, kf->state_cov_D, state_cov);
  // MAT_PRINTF((double *) state_cov, kf->state_dim, kf->state_dim);

  //TODO make more efficient via the structure of the transition matrix
  double FC[kf->state_dim * kf->state_dim];
  cblas_dsymm(CblasRowMajor, CblasRight, CblasUpper, //CBLAS_ORDER, CBLAS_SIDE, CBLAS_UPLO
              kf->state_dim, kf->state_dim, // int M, int N
              1, state_cov, kf->state_dim, // double alpha, double *A, int lda
              kf->transition_mtx, kf->state_dim, // double *B, int ldb
              0, FC, kf->state_dim); // double beta, double *C, int ldc
  // MAT_PRINTF((double *) FC, kf->state_dim, kf->state_dim);

  //TODO make more efficient via the structure of the transition matrix
  double FCF[kf->state_dim * kf->state_dim];
  memcpy(FCF, kf->transition_cov, kf->state_dim * kf->state_dim * sizeof(double));
  cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, // CBLAS_ORDER, CBLAS_TRANSPOSE transA, cBLAS_TRANSPOSE transB
              kf->state_dim, kf->state_dim, kf->state_dim, // int M, int N, int K
              1, FC, kf->state_dim, // double alpha, double *A, int lda
              kf->transition_mtx, kf->state_dim, //double *B, int ldb
              1, FCF, kf->state_dim); //beta, double *C, int ldc
  // MAT_PRINTF((double *) FCF, kf->state_dim, kf->state_dim);

  udu(kf->state_dim, FCF, kf->state_cov_U, kf->state_cov_D);
  // MAT_PRINTF((double *) state_cov_U, kf->state_dim, kf->state_dim);
  // VEC_PRINTF((double *) state_cov_D, kf->state_dim);
}
示例#15
0
static void FB_compute_H_mlcp(void* data_opaque, double* z, double* w, double* workV1, double* workV2, NumericsMatrix* H)
{
  printf("MLCP FB_compute_H_mlcp not implemented yet");
  exit(1);
#if 0
  MixedLinearComplementarityProblem* data = (MixedLinearComplementarityProblem *)data_opaque;
  unsigned int n = data->size;
  assert(data->M);
  assert(data->M->matrix0);
  double* M = data->M->matrix0;
  double normi;

  // workV1 = "z" in Facchibei--Pang p. 808
  // "z_i" = 1 if z_i = w_i = 0.0
  // M^T.workV1 --> workV2
  cblas_dgemv(CblasColMajor, CblasTrans, n, n, 1.0, M, n , workV1, 1, 0.0, workV2, 1);
  for (unsigned int i = 0; i < n; ++i)
  {
    if (workV1[i] != 0.0) // i in beta
    {
      normi = sqrt(workV1[i] * workV1[i] + workV2[i] * workV2[i]);
      for (unsigned int j = 0; j < n; j++)
      {
        H[j * n + i] = (workV2[i] / normi - 1.0) * M[j * n + i];
      }
      H[i * n + i] += (workV1[i] / normi - 1.0);

    }
    else // i not in beta
    {
      normi = sqrt(z[i] * z[i] + w[i] * w[i]);
      for (unsigned int j = 0; j < n; j++)
      {
        H[j * n + i] = (w[i] / normi - 1.0) * M[j * n + i];
      }
      H[i * n + i] += (z[i] / normi - 1.0);
    }

  }
#endif
}
示例#16
0
 void HMM::smoother(int iseq, double* c, double* beta, double* Z) {
     int si = o->sind[iseq];           // start index of the current sequence
     int sl = o->slen[iseq];           // length of the current sequence
     uint32_t* y_ = o->y + si;         // pointer to the current sequence
     
     double one = 1;
     
     // initialize: fill last frame of beta with ones
     vfill(&one, beta + (sl-1)*hs, 1, hs);
     
     for(int t = sl - 2; t > -1; t--) {
         double* out = beta + t*hs;
         vmul(beta + (t+1)*hs, 1, g + y_[t+1], os, Z, 1, hs);
         
         // using out as a temp vector because in cblas_dgemv, x cannot be y
         cblas_dgemv(CblasRowMajor, CblasNoTrans, hs, hs, 1, Q, hs,
                     Z, 1, 0, out, 1);
         
         vsdiv(out, 1, c + t+1, out, 1, hs);
     }
 }
void StVKReducedHessianTensor::ContractWithVector(int r, double * Hq, double * q, double * A)
{
  // computes A = Hq : q

  int quadraticSize = StVKReducedInternalForces::GetQuadraticSize(r);

  // multiply Hq and q
  cblas_dgemv(CblasColMajor, CblasTrans, 
       r, quadraticSize, 
       1.0,
       Hq, r,
       q, 1,
       0.0,
       A, 1);

  for(int j=r-1; j>=0; j--)
    for(int i=r-1; i>=j; i--)
    {
      int lowerTrianglePos = j * r - (j-1) * j / 2 + (i-j);
      A[ELT(r,i,j)] = A[lowerTrianglePos];
      A[ELT(r,j,i)] = A[lowerTrianglePos];
    }
}
示例#18
0
/* Numerics Matrix wrapper  for y <- alpha trans(A) x + beta y */
void NM_tgemv(const double alpha, NumericsMatrix* A, const double *x,
              const double beta, double *y)
{
    switch (A->storageType)
    {
    case NM_DENSE:
    {
        cblas_dgemv(CblasColMajor, CblasTrans, A->size0, A->size1,
                    alpha, A->matrix0, A->size0, x, 1, beta, y, 1);
        break;
    }
    case NM_SPARSE_BLOCK:
    case NM_SPARSE:
    {
        CHECK_RETURN(cs_aaxpy(alpha, NM_csc_trans(A), x, beta, y));
        break;
    }
    default:
    {
        assert(0 && "NM_tgemv unknown storageType");
    }
    }
}
示例#19
0
// computes means of the rows of A, subtracts them from A, and returns them in meanVec on the root process
// assumes memory has already been allocated for meanVec
void computeAndSubtractRowMeans(double *localRowChunk, double *meanVec, distMatrixInfo *matInfo) {
    int mpi_rank = matInfo->mpi_rank;
    int numcols = matInfo->numcols;
    int localrows = matInfo->localrows;
    int * rowcounts = matInfo->rowcounts;
    int * rowoffsets = matInfo->rowoffsets;
    MPI_Comm *comm = matInfo->comm;

    double *onesVec = (double *) malloc( numcols * sizeof(double));
    double *localMeanVec = (double *) malloc( localrows * sizeof(double));

    for(int idx = 0; idx < numcols; idx = idx + 1) {
        onesVec[idx]=1;
    }
    cblas_dgemv(CblasRowMajor, CblasNoTrans, localrows, numcols, 1.0/((double)numcols), localRowChunk, numcols, onesVec, 1, 0, localMeanVec, 1);
    cblas_dger(CblasRowMajor, localrows, numcols, -1.0, localMeanVec, 1, onesVec, 1, localRowChunk, numcols);
    if (mpi_rank != 0) {
        MPI_Gatherv(localMeanVec, localrows, MPI_DOUBLE, NULL, NULL, NULL, MPI_DOUBLE, 0, *comm);
    } else {
        MPI_Gatherv(localMeanVec, localrows, MPI_DOUBLE, meanVec, rowcounts, rowoffsets, MPI_DOUBLE, 0, *comm);
    }
    free(onesVec);
    free(localMeanVec);
}
示例#20
0
文件: la.c 项目: BrownRiceRice/amino
AA_API void aa_la_xlsnp( size_t m, size_t n,
                         const double *A, const double *A_star, const double *x,
                         const double *yp, double *y ) {
    aa_la_mvmul(n,m,A_star,x,y);

    double *B = (double*)aa_mem_region_local_alloc( sizeof(double) * n*n );

    // B = A^* A
    cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans,
                 (int)n, (int)n, (int)m,
                 1, A_star, (int)n, A, (int)m, 0, B, (int)n );

    // B =  A^* A - I
    for( size_t i = 0; i < n; i ++ )
        AA_MATREF(B,n,i,i) -= 1;

    // y = y + -B yp
    cblas_dgemv( CblasColMajor, CblasNoTrans, (int)n, (int)n,
                 -1.0, B, (int)n,
                 yp, 1,
                 1, y, 1 );

    aa_mem_region_local_pop( B );
}
示例#21
0
void plotMerit(double *z, double psi_k, double descentCondition)
{
  int incx = 1, incy = 1;
  double q_0, q_tk, qp_tk, merit_k;
  /* double tmin = 1e-12; */
  double tk = 1, aux;
  double m1 = 1e-4;
  double Nstep = 0;
  int i = 0;

  FILE *fp;

  (*sFphi)(sN, z, sphi_z, 0);
  aux = cblas_dnrm2(sN, sphi_z, 1);
  /* Computes merit function */
  aux = 0.5 * aux * aux;
  printf("plot psi_z %e\n", aux);


  if (!sPlotMerit)
    return;

  if (sPlotMerit)
  {
    /*    sPlotMerit=0;*/
    strcpy(fileName, "outputLS");


    (*sFphi)(sN, z, sphi_z, 0);
    q_0 =  cblas_dnrm2(sN, sphi_z , incx);
    q_0 = 0.5 * q_0 * q_0;

    fp = fopen(fileName, "w");

    /*    sPlotMerit=0;*/
    tk = 5e-7;
    aux = -tk;
    Nstep = 1e4;
    for (i = 0; i < 2 * Nstep; i++)
    {
      cblas_dcopy(sN, z, incx, sz2, incx);
      cblas_daxpy(sN , aux , sdir_descent , incx , sz2 , incy);
      (*sFphi)(sN, sz2, sphi_z, 0);
      q_tk =  cblas_dnrm2(sN, sphi_z , incx);
      q_tk = 0.5 * q_tk * q_tk;


      (*sFjacobianPhi)(sN, sz2, sjacobianPhi_z, 1);
      /* Computes the jacobian of the merit function, jacobian_psi = transpose(jacobianPhiMatrix).phiVector */
      cblas_dgemv(CblasColMajor,CblasTrans, sN, sN, 1.0, sjacobianPhi_z, sN, sphi_z, incx, 0.0, sgrad_psi_z, incx);
      qp_tk = cblas_ddot(sN, sgrad_psi_z, 1, sdir_descent, 1);

      merit_k = psi_k + m1 * aux * descentCondition;


      fprintf(fp, "%e %.16e %.16e %e\n", aux, q_tk, merit_k, qp_tk);
      if (i == Nstep - 1)
        aux = 0;
      else
        aux += tk / Nstep;
    }

    fclose(fp);
  }
}
示例#22
0
int nonSmoothNewtonNeigh(int n, double* z, NewtonFunctionPtr* phi, NewtonFunctionPtr* jacobianPhi, int* iparam, double* dparam)
{


  int itermax = iparam[0]; // maximum number of iterations allowed
  int iterMaxWithSameZ = itermax / 4;
  int niter = 0; // current iteration number
  double tolerance = dparam[0];
  /*   double coef; */
  sFphi = phi;
  sFjacobianPhi = jacobianPhi;
  //  verbose=1;
  if (verbose > 0)
  {
    printf(" ============= Starting of Newton process =============\n");
    printf(" - tolerance: %14.7e\n - maximum number of iterations: %i\n", tolerance, itermax);
  }

  int incx = 1;
  /*   int n2 = n*n; */
  int infoDGESV;

  /** merit function and its jacobian */
  double psi_z;

  /** The algorithm is alg 4.1 of the paper of Kanzow and Kleinmichel, "A new class of semismooth Newton-type methods
      for nonlinear complementarity problems", in Computational Optimization and Applications, 11, 227-251 (1998).

      We try to keep the same notations
  */

  double rho = 1e-8;
  double descentCondition, criterion, norm_jacobian_psi_z, normPhi_z;
  double p = 2.1;
  double terminationCriterion = 1;
  double norm;
  int findNewZ, i, j, NbLookingForANewZ;
  /*   int naux=0; */
  double aux = 0;
  /*   double aux1=0; */
  int ii;
  int resls = 1;
  /*   char c; */
  /*  double * oldz; */
  /*  oldz=(double*)malloc(n*sizeof(double));*/

  NbLookingForANewZ = 0;

  /** Iterations ... */
  while ((niter < itermax) && (terminationCriterion > tolerance))
  {
    scmp++;
    ++niter;
    /** Computes phi and its jacobian */
    if (sZsol)
    {
      for (ii = 0; ii < sN; ii++)
        szzaux[ii] = sZsol[ii] - z[ii];
      printf("dist zzsol %.32e.\n", cblas_dnrm2(n, szzaux, 1));
    }

    (*sFphi)(n, z, sphi_z, 0);
    (*sFjacobianPhi)(n, z, sjacobianPhi_z, 1);
    /* Computes the jacobian of the merit function, jacobian_psi = transpose(jacobianPhiMatrix).phiVector */
    cblas_dgemv(CblasColMajor,CblasTrans, n, n, 1.0, sjacobianPhi_z, n, sphi_z, incx, 0.0, sgrad_psi_z, incx);
    norm_jacobian_psi_z = cblas_dnrm2(n, sgrad_psi_z, 1);

    /* Computes norm2(phi) */
    normPhi_z = cblas_dnrm2(n, sphi_z, 1);
    /* Computes merit function */
    psi_z = 0.5 * normPhi_z * normPhi_z;

    if (normPhi_z < tolerance)
    {
      /*it is the solution*/
      terminationCriterion = tolerance / 2.0;
      break;
    }

    if (verbose > 0)
    {
      printf("Non Smooth Newton, iteration number %i, norm grad psi= %14.7e , psi = %14.7e, normPhi = %e .\n", niter, norm_jacobian_psi_z, psi_z, normPhi_z);
      printf(" -----------------------------------------------------------------------\n");
    }

    NbLookingForANewZ++;

    if (niter > 2)
    {
      if (10 * norm_jacobian_psi_z < tolerance || !resls || NbLookingForANewZ > iterMaxWithSameZ)
      {
        NbLookingForANewZ = 0;
        resls = 1;
        /*   if (NbLookingForANewZ % 10 ==1 && 0){
          printf("Try NonMonotomnelineSearch\n");
          cblas_dcopy(n,sgrad_psi_z,1,sdir_descent,1);
          cblas_dscal( n , -1.0 ,sdir_descent,incx);
          NonMonotomnelineSearch( z,  phi, 10);
          continue;
        }
        */

        /* FOR DEBUG ONLY*/
        if (sZsol)
        {
          printf("begin plot prev dir\n");
          plotMerit(z, 0, 0);
          printf("end\n");
          /*     gets(&c);*/
          (*sFphi)(n, sZsol, szaux, 0);
          printf("value psi(zsol)=%e\n", cblas_dnrm2(n, szaux, 1));
          cblas_dcopy(n, sZsol, incx, szaux, incx);
          cblas_daxpy(n , -1 , z , 1 , szaux , 1);
          printf("dist to sol %e \n", cblas_dnrm2(n, szaux, 1));
          for (ii = 0; ii < n; ii++)
            sdir_descent[ii] = sZsol[ii] - z[ii];

          aux = norm;
          norm = 1;
          printf("begin plot zzsol dir\n");
          plotMerit(z, 0, 0);
          printf("end\n");
          /*     gets(&c);*/
          norm = aux;
        }

        printf("looking for a new Z...\n");
        /*may be a local minimal*/
        /*find a gradiant going out of this cul-de-sac.*/
        norm = n / 2;
        findNewZ = 0;
        for (j = 0; j < 20; j++)
        {

          for (i = 0; i < n; i++)
          {
            if (sZsol)
            {
              /* FOR DEBUG ONLY*/
              (*sFphi)(n, sZsol, sphi_zaux, 0);
              norm = cblas_dnrm2(n, sphi_zaux, 1);
              printf("Norm of the sol %e.\n", norm);

              for (ii = 0; ii < n; ii++)
                sdir_descent[ii] = sZsol[ii] - z[ii];
              norm = 1;
            }
            else
            {
              for (ii = 0; ii < n; ii++)
              {
                sdir_descent[ii] = 1.0 * rand();
              }
              cblas_dscal(n, 1 / cblas_dnrm2(n, sdir_descent, 1), sdir_descent, incx);
              cblas_dscal(n, norm, sdir_descent, incx);
            }
            cblas_dcopy(n, z, incx, szaux, incx);
            // cblas_dscal(n,0.0,zaux,incx);
            /* zaux = z + dir */
            cblas_daxpy(n , norm , sdir_descent , 1 , szaux , 1);
            /* Computes the jacobian of the merit function, jacobian_psi_zaux = transpose(jacobianPhi_zaux).phi_zaux */
            (*sFphi)(n, szaux, sphi_zaux, 0);
            (*sFjacobianPhi)(n, szaux, sjacobianPhi_zaux, 1);

            /* FOR DEBUG ONLY*/
            if (sZsol)
            {
              aux = cblas_dnrm2(n, sphi_zaux, 1);
              printf("Norm of the sol is now %e.\n", aux);
              for (ii = 0; ii < n; ii++)
                printf("zsol %e zaux %e \n", sZsol[ii], szaux[ii]);
            }


            cblas_dgemv(CblasColMajor, CblasTrans, n, n, 1.0, sjacobianPhi_zaux, n, sphi_zaux, incx, 0.0, sgrad_psi_zaux, incx);
            cblas_dcopy(n, szaux, 1, szzaux, 1);
            cblas_daxpy(n , -1 , z , incx , szzaux , incx);
            /*zzaux must be a descente direction.*/
            /*ie jacobian_psi_zaux.zzaux <0
            printf("jacobian_psi_zaux : \n");*/
            /*cblas_dcopy(n,sdir,incx,sdir_descent,incx);
            plotMerit(z, phi);*/


            aux = cblas_ddot(n, sgrad_psi_zaux, 1, szzaux, 1);
            /*       aux1 = cblas_dnrm2(n,szzaux,1);
            aux1 = cblas_dnrm2(n,sgrad_psi_zaux,1);*/
            aux = aux / (cblas_dnrm2(n, szzaux, 1) * cblas_dnrm2(n, sgrad_psi_zaux, 1));
            /*       printf("aux: %e\n",aux);*/
            if (aux < 0.1 * (j + 1))
            {
              //zaux is the new point.
              findNewZ = 1;
              cblas_dcopy(n, szaux, incx, z, incx);
              break;
            }
          }
          if (findNewZ)
            break;
          if (j == 10)
          {
            norm = n / 2;
          }
          else if (j > 10)
            norm = -2 * norm;
          else
            norm = -norm / 2.0;
        }
        if (! findNewZ)
        {
          printf("failed to find a new z\n");
          /* exit(1);*/
          continue;

        }
        else
          continue;
      }
    }

    /* Stops if the termination criterion is satisfied */
    terminationCriterion = norm_jacobian_psi_z;
    /*      if(terminationCriterion < tolerance){
    break;
    }*/

    /* Search direction calculation
    Find a solution dk of jacobianPhiMatrix.d = -phiVector.
    dk is saved in phiVector.
    */
    cblas_dscal(n , -1.0 , sphi_z, incx);
    DGESV(n, 1, sjacobianPhi_z, n, sipiv, sphi_z, n, &infoDGESV);
    if (infoDGESV)
    {
      printf("DGEV error %d.\n", infoDGESV);
    }
    cblas_dcopy(n, sphi_z, 1, sdir_descent, 1);
    criterion = cblas_dnrm2(n, sdir_descent, 1);
    /*      printf("norm dir descent %e\n",criterion);*/

    /*printf("begin plot descent dir\n");
    plotMerit(z, phi);
    printf("end\n");
          gets(&c);*/

    /*printf("begin plot zzsol dir\n");
    plotMeritToZsol(z,phi);
    printf("end\n");
          gets(&c);*/


    /*
    norm = cblas_dnrm2(n,sdir_descent,1);
    printf("norm desc %e \n",norm);
    cblas_dscal( n , 1/norm , sdir_descent, 1);
    */
    /* descentCondition = jacobian_psi.dk */
    descentCondition = cblas_ddot(n, sgrad_psi_z,  1,  sdir_descent, 1);

    /* Criterion to be satisfied: error < -rho*norm(dk)^p */
    criterion = -rho * pow(criterion, p);
    /*      printf("ddddddd %d\n",scmp);
    if (scmp>100){
    displayMat(sjacobianPhi_z,n,n,n);
    exit(1);
    }*/

//    if ((infoDGESV != 0 || descentCondition > criterion) && 0)
//    {
//      printf("no a desc dir, get grad psy\n");
      /* dk = - jacobian_psi (remind that dk is saved in phi_z) */
//      cblas_dcopy(n, sgrad_psi_z, 1, sdir_descent, 1);
//      cblas_dscal(n , -1.0 , sdir_descent, incx);
      /*DEBUG ONLY*/
      /*printf("begin plot new descent dir\n");
      plotMerit(z);
      printf("end\n");
       gets(&c);*/
//    }
    /*      coef=fabs(norm_jacobian_psi_z*norm_jacobian_psi_z/descentCondition);
    if (coef <1){
    cblas_dscal(n,coef,sdir_descent,incx);
    printf("coef %e norm dir descent is now %e\n",coef,cblas_dnrm2(n,sdir_descent,1));
    }*/


    /* Step-3 Line search: computes z_k+1 */
    /*linesearch_Armijo(n,z,sdir_descent,psi_z, descentCondition, phi);*/
    /*            if (niter == 10){
    printf("begin plot new descent dir\n");
    plotMerit(z);
    printf("end\n");
     gets(&c);
    }*/
    /*      memcpy(oldz,z,n*sizeof(double));*/

    resls = linesearch2_Armijo(n, z, psi_z, descentCondition);
    if (!resls && niter > 1)
    {

      /* displayMat(sjacobianPhi_z,n,n,n);
      printf("begin plot new descent dir\n");
      plotMerit(oldz,psi_z, descentCondition);
      printf("end\n");
      gets(&c);*/
    }


    /*      lineSearch_Wolfe(z, descentCondition, phi,jacobianPhi);*/
    /*      if (niter>3){
    printf("angle between prev dir %e.\n",acos(cblas_ddot(n, sdir_descent,  1,  sPrevDirDescent, 1)/(cblas_dnrm2(n,sdir_descent,1)*cblas_dnrm2(n,sPrevDirDescent,1))));
    }*/
    cblas_dcopy(n, sdir_descent, 1, sPrevDirDescent, 1);

    /*      for (j=20;j<32;j++){
    if (z[j]<0)
    z[j]=0;
    }*/

    /*      if( 1 || verbose>0)
    {
     printf("Non Smooth Newton, iteration number %i, error grad equal to %14.7e , psi value is %14.7e .\n",niter, terminationCriterion,psi_z);
       printf(" -----------------------------------------------------------------------\n");
       }*/
  }

  /* Total number of iterations */
  iparam[1] = niter;
  /* Final error */
  dparam[1] = terminationCriterion;

  /** Free memory*/

  if (verbose > 0)
  {
    if (dparam[1] > tolerance)
      printf("Non Smooth Newton warning: no convergence after %i iterations\n" , niter);

    else
      printf("Non Smooth Newton: convergence after %i iterations\n" , niter);
    printf(" The residue is : %e \n", dparam[1]);
  }

  /*  free(oldz);*/

  if (dparam[1] > tolerance)
    return 1;
  else return 0;
}
示例#23
0
int lineSearch_Wolfe(double *z, double qp_0)
{
  int incx = 1, incy = 1;
  double q_0, q_tk, qp_tk;
  double tmin = 1e-12;
  int maxiter = 100;
  int niter = 0;
  double tk = 1;
  double tg, td;
  double m1 = 0.1;
  double m2 = 0.9;


  (*sFphi)(sN, z, sphi_z, 0);
  q_0 =  cblas_dnrm2(sN, sphi_z , incx);
  q_0 = 0.5 * q_0 * q_0;

  tg = 0;
  td = 10e5;

  tk = (tg + td) / 2.0;

  while (niter < maxiter || (td - tg) < tmin)
  {
    niter++;
    /*q_tk = 0.5*|| phi(z+tk*d) ||*/
    cblas_dcopy(sN, z, incx, sz2, incx);
    cblas_daxpy(sN , tk , sdir_descent , incx , sz2 , incy);
    (*sFphi)(sN, sz2, sphi_z, 0);
    q_tk =  cblas_dnrm2(sN, sphi_z , incx);
    q_tk = 0.5 * q_tk * q_tk;

    (*sFjacobianPhi)(sN, sz2, sjacobianPhi_z, 1);
    /* Computes the jacobian of the merit function, jacobian_psi = transpose(jacobianPhiMatrix).phiVector */
    cblas_dgemv(CblasColMajor,CblasTrans, sN, sN, 1.0, sjacobianPhi_z, sN, sphi_z, incx, 0.0, sgrad_psi_z, incx);
    qp_tk = cblas_ddot(sN, sgrad_psi_z, 1, sdir_descent, 1);
    if (qp_tk <  m2 * qp_0 && q_tk < q_0 + m1 * tk * qp_0)
    {
      /*too small*/
      if (niter == 1)
        break;
      tg = tk;
      tk = (tg + td) / 2.0;
      continue;
    }
    else if (q_tk > q_0 + m1 * tk * qp_0)
    {
      /*too big*/
      td = tk;
      tk = (tg + td) / 2.0;
      continue;
    }
    else
      break;
  }

  cblas_dcopy(sN, sz2, incx, z, incx);

  if ((td - tg) <= tmin)
  {
    printf("NonSmoothNewton2::lineSearchWolfe warning, resulting tk < tmin, linesearch stopped.\n");
    return 0;
  }
  return 1;

}
示例#24
0
/* Linesearch */
int linesearch2_Armijo(int n, double *z, double psi_k, double descentCondition)
{

  /* IN :
     psi_k (merit function for current iteration)
     jacobian_psi_k (jacobian of the merit function)
     dk: descent direction

     OUT: tk, z
  */

  double m1 = 0.1;
  double tk = 1;
  double tkl, tkr, tkaux;
  int incx = 1, incy = 1;
  double merit, merit_k;
  double tmin = 1e-14;
  double qp_tk;

  /*  cblas_dcopy(sN, z, incx,sz2,incx);*/

  /* z1 = z0 + dir */
  /*  cblas_daxpy(n , 1.0 , sdir_descent , incx , z , incy );*/

  tk = 3.25;


  while (tk > tmin)
  {

    /* Computes merit function = 1/2*norm(phi(z_{k+1}))^2 */
    cblas_dcopy(sN, z, incx, sz2, incx);
    cblas_daxpy(n , tk , sdir_descent , incx , sz2 , incy);


    (*sFphi)(n, sz2, sphi_z, 0);
    merit =  cblas_dnrm2(n, sphi_z , incx);
    merit = 0.5 * merit * merit;
    merit_k = psi_k + m1 * tk * descentCondition;
    if (merit < merit_k)
    {
      tkl = 0;
      tkr = tk;

      /*calcul merit'(tk)*/
      (*sFjacobianPhi)(sN, sz2, sjacobianPhi_z, 1);
      /* Computes the jacobian of the merit function, jacobian_psi = transpose(jacobianPhiMatrix).phiVector */
      cblas_dgemv(CblasColMajor,CblasTrans, sN, sN, 1.0, sjacobianPhi_z, sN, sphi_z, incx, 0.0, sgrad_psi_zaux, incx);
      qp_tk = cblas_ddot(sN, sgrad_psi_zaux, 1, sdir_descent, 1);

      if (qp_tk > 0)
      {
        while (fabs(tkl - tkr) > tmin)
        {
          tkaux = 0.5 * (tkl + tkr);
          cblas_dcopy(sN, z, incx, sz2, incx);
          cblas_daxpy(n , tkaux , sdir_descent , incx , sz2 , incy);
          /*calcul merit'(tk)*/
          (*sFphi)(n, sz2, sphi_z, 0);
          (*sFjacobianPhi)(sN, sz2, sjacobianPhi_z, 1);
          /* Computes the jacobian of the merit function, jacobian_psi = transpose(jacobianPhiMatrix).phiVector */
          cblas_dgemv(CblasColMajor,CblasTrans, sN, sN, 1.0, sjacobianPhi_z, sN, sphi_z, incx, 0.0, sgrad_psi_zaux, incx);
          qp_tk = cblas_ddot(sN, sgrad_psi_zaux, 1, sdir_descent, 1);
          if (qp_tk > 0)
          {
            tkr = tkaux;
          }
          else
          {
            tkl = tkaux;
          }
        }
      }

      /* printf("merit = %e, merit_k=%e,tk= %e,tkaux=%e \n",merit,merit_k,tk,tkaux);*/
      cblas_dcopy(sN, sz2, incx, z, incx);
      break;
    }
    tk = tk * 0.5;
  }
  if (tk <= tmin)
  {
    cblas_dcopy(sN, sz2, incx, z, incx);
    printf("NonSmoothNewton::linesearch2_Armijo warning, resulting tk=%e < tmin, linesearch stopped.\n", tk);
    return 0;

  }
  return 1;

}
示例#25
0
void caffe_cpu_gemv<double>(const CBLAS_TRANSPOSE TransA, const int M,
                            const int N, const double alpha, const double* A, const double* x,
                            const double beta, double* y) {
    cblas_dgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1);
}
示例#26
0
/*
 * (input) double *z : size n+m
 * (output)double *w : size n+m
 *
 *
 */
int mlcp_compute_error(MixedLinearComplementarityProblem* problem, double *z, double *w, double tolerance, double * error)
{
  /* Checks inputs */
  if (problem == NULL || z == NULL || w == NULL)
    numerics_error("mlcp_compute_error", "null input for problem and/or z and/or w");

  int param = 1;
  int NbLines = problem->M->size0; /* Equalities */
  int n = problem->n; /* Equalities */
  int m = problem->m; /* Inequalities */
  int incx = 1, incy = 1;

  /* Computation of w: depends on the way the problem is written */

  /* Problem in the form (M,q) */
  if (problem->isStorageType1)
  {
    if (problem->M == NULL)
      numerics_error("mlcp_compute_error", "null input for M");

    /* Computes w = Mz + q */
    cblas_dcopy(NbLines , problem->q , incx , w , incy);
    prodNumericsMatrix(problem->M->size1, problem->M->size0, 1.0, problem->M, z, 1.0, w);

  }
  /* Problem in the form ABCD */
  else //if (problem->isStorageType2)
  {



    /* Checks inputs */
    if (problem->A == NULL || problem->B == NULL || problem->C == NULL  || problem->D == NULL)
    {
      numerics_error("mlcp_compute_error: ", "null input for A, B, C or D");
    }

    /* Links to problem data */
    double *a = &problem->q[0];
    double *b = &problem->q[NbLines - m];
    double *A = problem->A;
    double *B = problem->B;
    double *C = problem->C;
    double *D = problem->D;

    /* Compute "equalities" part, we = Au + Cv + a - Must be equal to 0 */
    cblas_dcopy(NbLines - m , a , incx , w , incy); //  we = w[0..n-1] <-- a
    cblas_dgemv(CblasColMajor,CblasNoTrans , NbLines - m, n , 1.0 , A , NbLines - m , &z[0] , incx , 1.0 , w , incy); // we <-- A*u + we
    cblas_dgemv(CblasColMajor,CblasNoTrans , NbLines - m, m , 1.0 , C , NbLines - m , &z[n] , incx , 1.0 , w , incy); // we <-- C*v + we

    /* Computes part which corresponds to complementarity */
    double * pwi = w + NbLines - m; // No copy!!
    cblas_dcopy(m , b , incx , pwi , incy); //  wi = w[n..m] <-- b
    // following int param, we recompute the product wi = Du+BV +b and we = Au+CV +a
    // The test is then more severe if we compute w because it checks that the linear equation is satisfied
    if (param == 1)
    {
      cblas_dgemv(CblasColMajor,CblasNoTrans , m, n , 1.0 , D , m , &z[0] , incx , 1.0 , pwi , incy);   // wi <-- D*u+ wi
      cblas_dgemv(CblasColMajor,CblasNoTrans , m , m , 1.0 , B , m , &z[n] , incx , 1.0 , pwi , incy);  // wi <-- B*v + wi
    }
  }

  /* Error on equalities part */
  double error_e = 0;
  /* Checks complementarity (only for rows number n to size) */
  double error_i = 0.;
  double zi, wi;
  double *q = problem->q;
  double norm_e = 1;
  double norm_i = 1;
  if (problem->blocksRows)
  {
    int numBlock = 0;
    while (problem->blocksRows[numBlock] < n + m)
    {
      if (!problem->blocksIsComp[numBlock])
      {
        error_e += cblas_dnrm2(problem->blocksRows[numBlock + 1] - problem->blocksRows[numBlock], w + problem->blocksRows[numBlock] , incx);
        norm_e += cblas_dnrm2(problem->blocksRows[numBlock + 1] - problem->blocksRows[numBlock], q + problem->blocksRows[numBlock] , incx);
      }
      else
      {
        for (int numLine = problem->blocksRows[numBlock]; numLine < problem->blocksRows[numBlock + 1] ; numLine++)
        {
          zi = z[numLine];
          wi = w[numLine];
          if (zi < 0.0)
          {
            error_i += -zi;
            if (wi < 0.0) error_i += zi * wi;
          }
          if (wi < 0.0) error_i += -wi;
          if ((zi > 0.0) && (wi > 0.0)) error_i += zi * wi;
        }
        norm_i += cblas_dnrm2(problem->blocksRows[numBlock + 1] - problem->blocksRows[numBlock], w + problem->blocksRows[numBlock] , incx);
      }
      numBlock++;
    }
  }
  else
  {
    printf("WARNING, DEPRECATED MLCP API\n");
    /* Error on equalities part */
    error_e = cblas_dnrm2(NbLines - m , w , incx);;

    /* Checks complementarity (only for rows number n to size) */
    error_i = 0.;

    for (int i = 0 ; i < m ; i++)
    {
      zi = z[n + i];
      wi = w[(NbLines - m) + i];
      if (zi < 0.0)
      {
        error_i += -zi;
        if (wi < 0.0) error_i += zi * wi;
      }
      if (wi < 0.0) error_i += -wi;
      if ((zi > 0.0) && (wi > 0.0)) error_i += zi * wi;
    }


    /* Computes error */
    norm_i += cblas_dnrm2(m , q + NbLines - m , incx);
    norm_e += cblas_dnrm2(NbLines - m , q , incx);
  }

  if (error_i / norm_i >= error_e / norm_e)
  {
    *error = error_i / (1.0 + norm_i);
  }
  else
  {
    *error = error_e / (1.0 + norm_e);
  }

  if (*error > tolerance)
  {
    /*if (isVerbose > 0) printf(" Numerics - mlcp_compute_error failed: error = %g > tolerance = %g.\n",*error, tolerance);*/
    if (verbose)
      printf(" Numerics - mlcp_compute_error failed: error = %g > tolerance = %g.\n", *error, tolerance);
    /* displayMLCP(problem);*/
    return 1;
  }
  else
  {
    if (verbose > 0) printf("Siconos/Numerics: mlcp_compute_error: Error evaluation = %g \n", *error);
    return 0;
  }
}
示例#27
0
void FrictionContact2D_latin(FrictionContactProblem* problem , double *reaction , double *velocity , int *info, SolverOptions* options)
{
  int nc = problem->numberOfContacts;
  assert(nc>0);
  double * vec = problem->M->matrix0;
  double *qq = problem->q;
  double * mu = problem->mu;



  int info77 = 0;
  int i, j, kk, iter1, ino, ddl, nrhs;
  int info2 = 0;
  int n = 2 * nc;
  size_t idim, nbno;
  int incx = 1, incy = 1;
  size_t taille, taillet, taillen, itt;
  int *ddln;
  int *ddlt, *vectnt;
  assert(n>0);

  double  errmax, alpha, beta, maxa, k_latin;
  double  aa, nt, wn, tc, zc0;
  double  err1, num11, err0;
  double  den11, den22, knz0, ktz0, *ktz, *wf;
  double  *wc, *zc, *wt, *maxwt, *wnum1, *znum1;
  double  *zt, *maxzt;

  double  *kn, *kt;

  // char    trans='T', diag='N';
  // char    uplo='U', notrans='N';



  double  *k, *DPO, *kf, *kninv;
  double  *kinvwden1, *kzden1, *kfinv, *knz, *wtnc;



  /*                Recup input                    */


  itt     = options->iparam[0];
  errmax  = options->dparam[0];
  k_latin = options->dparam[2];

  /*               Initialize output                */


  options->iparam[1] = 0;
  options->dparam[1] = 0.0;


  /*               Allocations                      */

  k         = (double*) malloc(n * n * sizeof(double));
  DPO       = (double*) malloc(n * n * sizeof(double));
  kf        = (double*) malloc(n * n * sizeof(double));
  kfinv     = (double*) malloc(n * n * sizeof(double));

  kninv     = (double*) malloc(nc * nc * sizeof(double));
  kn        = (double*) malloc(nc * nc * sizeof(double));
  kt        = (double*) malloc(nc * nc * sizeof(double));

  kinvwden1 = (double*) malloc(n  * sizeof(double));
  kzden1    = (double*) malloc(n  * sizeof(double));
  wc        = (double*) malloc(n  * sizeof(double));
  zc        = (double*) malloc(n  * sizeof(double));
  znum1     = (double*) malloc(n  * sizeof(double));
  wnum1     = (double*) malloc(n  * sizeof(double));
  wt        = (double*) malloc(n  * sizeof(double));
  maxzt     = (double*) malloc(n  * sizeof(double));



  knz       = (double*) malloc(nc * sizeof(double));
  wtnc      = (double*) malloc(nc * sizeof(double));
  ktz       = (double*) malloc(nc * sizeof(double));
  wf        = (double*) malloc(nc * sizeof(double));
  maxwt     = (double*) malloc(nc * sizeof(double));
  zt        = (double*) malloc(nc * sizeof(double));


  vectnt    = (int*) malloc(n * sizeof(int));

  ddln      = (int*) malloc(nc * sizeof(int));
  ddlt      = (int*) malloc(nc * sizeof(int));

  /*                    Initialization                   */



  for (i = 0; i < n * n; i++)
  {
    k[i]     = 0.;
    kf[i]    = 0.;
    kfinv[i] = 0.;

    if (i < nc * nc)
    {

      kn[i]    = 0.0;
      kt[i]    = 0.0;
      kninv[i] = 0.0;


      if (i < n)
      {
        wc[i]    = 0.0;
        zc[i]    = 0.;
        reaction[i]     = 0.;
        velocity[i]     = 0.;
        znum1[i] = 0.;
        wnum1[i] = 0.;
        wt[i]    = 0.;
        maxzt[i] = 0.;

        if (i < nc)
        {
          maxwt[i] = 0.;
          zt[i]    = 0.;
          knz[i]   = 0.;
          ktz[i]   = 0.;
          wf[i]    = 0.;
          wtnc[i]  = 0.;
        }

      }

    }
  }


  for (i = 0; i < n; i++)
  {

    if (fabs(vec[i * n + i]) < DBL_EPSILON)
    {

      if (verbose > 0)
        printf("\n Warning nul diagonal term in M matrix \n");

      free(k);
      free(DPO);
      free(kf);
      free(kfinv);
      free(kninv);
      free(kn);
      free(kt);
      free(kinvwden1);
      free(kzden1);
      free(wc);
      free(zc);
      free(znum1);
      free(wnum1);
      free(wt);
      free(maxzt);
      free(knz);
      free(wtnc);
      free(ktz);
      free(wf);
      free(maxwt);
      free(zt);
      free(vectnt);
      free(ddln);
      free(ddlt);

      *info = 3;

      return;


    }
    else
    {

      k[i + n * i] = k_latin / vec[i * n + i];
      vectnt[i] = i + 1;

    }

  }


  for (i = 0; i < nc; i++)
  {
    ddln[i] = vectnt[2 * i];
    if (i != 0) ddlt[i] = vectnt[2 * i - 1];
    else ddlt[i] = 0;

  }


  for (i = 0; i < nc; i++)
  {
    kn[i + nc * i] = k[ddln[i] + n * ddln[i]];
    kt[i + nc * i] = k[ddlt[i] + n * ddlt[i]];
  }




  taillen = sizeof(ddln) / sizeof(ddln[0]);
  taillet = sizeof(ddlt) / sizeof(ddlt[0]);

  idim = 1 +  taillen / taillet;

  taille = 0;
  for (i = 0; i < n; i++)
    taille = sizeof(qq[i]) + taille;

  taille = taille / sizeof(qq[0]);
  nbno = taille / idim;


  for (i = 0; i < nc; i++)
  {
    kf[ddln[i] + n * ddln[i]] = kn[i + nc * i];
    kf[ddlt[i] + n * ddlt[i]] = kt[i + nc * i];
  }


  for (i = 0; i < n; i++)
  {
    kfinv[i + n * i] = 1. / kf[i + n * i];

    if (i < nc)
      kninv[i + nc * i] = 1. / kt[i + nc * i];

  }


  for (i = 0; i < n; i++)
    for (j = 0; j < n; j++)
      DPO[i + n * j] = vec[j * n + i] + kfinv[i + n * j];



  DPOTRF(LA_UP, n, DPO , n, &info2);

  if (info2 != 0)
  {
    if (verbose > 0)
      printf("\n Matter with Cholesky factorization \n");

    free(k);
    free(DPO);
    free(kf);
    free(kfinv);
    free(kninv);
    free(kn);
    free(kt);
    free(kinvwden1);
    free(kzden1);
    free(wc);
    free(zc);
    free(znum1);
    free(wnum1);
    free(wt);
    free(maxzt);
    free(knz);
    free(wtnc);
    free(ktz);
    free(wf);
    free(maxwt);
    free(zt);
    free(vectnt);
    free(ddln);
    free(ddlt);

    *info = 2;
    return;
  }

  /*                Iteration loops                  */


  iter1 = 0;
  err1  = 1.;



  while ((iter1 < itt) && (err1 > errmax))
  {

    /*               Linear stage (zc,wc) -> (z,w)         */

    alpha  = 1.;
    beta   = 1.;
    cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kfinv, n, zc, incx, beta, wc, incy);

    cblas_dcopy(n, qq, incx, znum1, incy);

    alpha = -1.;
    cblas_dscal(n , alpha , znum1 , incx);

    alpha = 1.;
    cblas_daxpy(n, alpha, wc, incx, znum1, incy);

    nrhs = 1;
    DTRTRS(LA_UP, LA_TRANS, LA_NONUNIT, n, nrhs, DPO, n, znum1, n, &info77);

    DTRTRS(LA_UP, LA_NOTRANS, LA_NONUNIT, n, nrhs, DPO, n, znum1, n, &info77);

    cblas_dcopy(n, znum1, incx, reaction, incy);

    alpha = -1.;
    beta = 1.;
    cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kfinv, n, reaction, incx, beta, wc, incy);

    cblas_dcopy(n, wc, incx, velocity, incy);



    /*               Local stage (z,w)->(zc,wc)          */


    for (i = 0; i < n; i++)
    {
      zc[i] = 0.;
      wc[i] = 0.0;
    }


    /*          Normal party                           */



    for (i = 0; i < nc; i++)
    {
      knz0 = 0.;
      for (kk = 0; kk < nc; kk++)
      {
        knz[i] = kt[i + nc * kk] * velocity[ddlt[kk]] + knz0;
        knz0 = knz[i];
      }

      zt[i] = reaction[ddlt[i]] - knz[i];

      if (zt[i] > 0.0)
      {
        zc[ddlt[i]] = zt[i];
        maxzt[i] = 0.0;
      }
      else
      {
        zc[ddlt[i]] = 0.0;
        maxzt[i] = -zt[i];
      }
    }

    for (i = 0; i < nc; i++)
    {
      zc0 = 0.;
      ktz0 = 0.;
      for (j = 0; j < nc; j++)
      {
        wc[ddlt[i]] = kninv[i + nc * j] * maxzt[j] + zc0;
        zc0 = wc[ddlt[i]];
        ktz[i] = kn[i + nc * j] * velocity[ddln[j]] + ktz0;
        ktz0 =  ktz[i];
      }
      wf[i] = reaction[ddln[i]] - ktz[i];
    }


    /*             Loop other nodes              */


    for (ino = 0; ino < nbno; ino++)
    {
      ddl  = ddln[ino];
      nt   = fabs(wf[ino]);


      /*          Tangential vector              */



      if (nt < 1.e-8) tc = 0.;
      else tc = wf[ino] / nt;



      /*               Tangentiel component             */


      wn = zc[ddlt[ino]];

      aa = nt - mu[ino] * wn;

      if (aa > 0.0)
      {
        maxa = aa;
      }
      else
      {
        maxa = 0.0;
      }

      wc[ddl] = (maxa / (-1 * kn[ino + nc * ino])) * tc;

      aa = -nt + mu[ino] * wn;

      if (aa > 0.0)
      {
        maxa = aa;
      }
      else
      {
        maxa = 0.0;
      }

      zc[ddl] = (mu[ino] * wn - maxa) * tc;

    }

    /*               Convergence criterium                */



    cblas_dcopy(n, reaction, incx, znum1, incy);

    alpha = -1.;
    cblas_daxpy(n, alpha, zc, incx, znum1, incy);

    cblas_dcopy(n, velocity, incx, wnum1, incy);

    cblas_daxpy(n, alpha, wc, incx, wnum1, incy);

    alpha  = 1.;
    beta   = 1.;
    cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kf, n, wnum1, incx, beta, znum1, incy);

    num11  = 0.;
    alpha  = 1.;
    beta = 0.;
    cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kfinv, n, znum1, incx, beta, wnum1, incy);

    num11 = cblas_ddot(n, wnum1, incx, znum1, incy);

    cblas_dcopy(n, reaction, incx, znum1, incy);

    alpha  = 1.;
    beta   = 1.;
    cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kf, n, velocity, incx, beta, znum1, incy);

    alpha  = 1.;
    beta   = 0.;
    cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kfinv, n, znum1, incx, beta, wnum1, incy);

    den11  = cblas_ddot(n, wnum1, incx, znum1, incy);

    cblas_dcopy(n, zc, incx, znum1, incy);

    alpha  = 1.;
    beta   = 1.;
    cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kf, n, wc, incx, beta, znum1, incy);

    alpha  = 1.;
    beta   = 0.;
    cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kfinv, n, znum1, incx, beta, wnum1, incy);

    den22  = cblas_ddot(n, znum1, incx, wnum1, incy);

    err0   = num11 / (den11 + den22);

    err1   = sqrt(err0);

    options->iparam[1] = iter1;
    options->dparam[1] = err1;

    iter1   = iter1 + 1;


  }


  if (err1 > errmax)
  {

    if (verbose > 0)
      printf("No convergence after %d iterations, the residue is %g\n", iter1, err1);

    *info = 1;
  }
  else
  {

    if (verbose > 0)
      printf("Convergence after %d iterations, the residue is %g \n", iter1, err1);

    *info = 0;
  }

  free(k);
  free(DPO);
  free(kf);
  free(kfinv);
  free(kninv);
  free(kn);
  free(kt);
  free(kinvwden1);
  free(kzden1);
  free(wc);
  free(zc);
  free(znum1);
  free(wnum1);
  free(wt);
  free(maxzt);
  free(knz);
  free(wtnc);
  free(ktz);
  free(wf);
  free(maxwt);
  free(zt);
  free(vectnt);
  free(ddln);
  free(ddlt);



}
示例#28
0
int main ( )
{
   CBLAS_LAYOUT Layout;
   CBLAS_TRANSPOSE transa;

   double *a, *x, *y;
   double alpha, beta;
   int m, n, lda, incx, incy, i;

   Layout = CblasColMajor;
   transa = CblasNoTrans;

   m = 4; /* Size of Column ( the number of rows ) */
   n = 4; /* Size of Row ( the number of columns ) */
   lda = 4; /* Leading dimension of 5 * 4 matrix is 5 */
   incx = 1;
   incy = 1;
   alpha = 1;
   beta = 0;

   a = (double *)malloc(sizeof(double)*m*n);
   x = (double *)malloc(sizeof(double)*n);
   y = (double *)malloc(sizeof(double)*n);
   /* The elements of the first column */
   a[0] = 1;
   a[1] = 2;
   a[2] = 3;
   a[3] = 4;
   /* The elements of the second column */
   a[m] = 1;
   a[m+1] = 1;
   a[m+2] = 1;
   a[m+3] = 1;
   /* The elements of the third column */
   a[m*2] = 3;
   a[m*2+1] = 4;
   a[m*2+2] = 5;
   a[m*2+3] = 6;
   /* The elements of the fourth column */
   a[m*3] = 5;
   a[m*3+1] = 6;
   a[m*3+2] = 7;
   a[m*3+3] = 8;
   /* The elemetns of x and y */
   x[0] = 1;
   x[1] = 2;
   x[2] = 1;
   x[3] = 1;
   y[0] = 0;
   y[1] = 0;
   y[2] = 0;
   y[3] = 0;

   cblas_dgemv( Layout, transa, m, n, alpha, a, lda, x, incx, beta,
                y, incy );
   /* Print y */
   for( i = 0; i < n; i++ )
      printf(" y%d = %f\n", i, y[i]);
   free(a);
   free(x);
   free(y);
   return 0;
}
示例#29
0
/*
 * The equalities are eliminated.
 *
 *0=(Me_1 Me_2)(Re Ri)' + Qe
 *Vi=(Mi_1 Mi_2)(Re Ri)' + Qi
 *
 *Re=-Me_1^{-1}(Me_2Ri+Qe)
 *
 *Vi=(Mi_2-Mi_1 Me_1^{-1} Me_2)Ri+Qi-Mi1 Me_1^{-1} Qe
 *
 */
void GMPReducedSolve(GenericMechanicalProblem* pInProblem, double *reaction , double *velocity, int * info, SolverOptions* options, NumericsOptions* numerics_options)
{

  SparseBlockStructuredMatrix* m = pInProblem->M->matrix1;
  int nbRow = m->blocksize0[m->blocknumber0 - 1];
  int nbCol = m->blocksize1[m->blocknumber1 - 1];
  double *Me = (double *) malloc(nbRow * nbCol * sizeof(double));
  double *Qe = (double *) malloc(nbRow * sizeof(double));
  double *Mi = (double *) malloc(nbRow * nbCol * sizeof(double));
  double *Qi = (double *) malloc(nbRow * sizeof(double));
  int Me_size;
  int Mi_size;
  buildReducedGMP(pInProblem, Me, Mi, Qe, Qi, &Me_size, &Mi_size);

  if ((Me_size == 0 || Mi_size == 0))
  {
    genericMechanicalProblem_GS(pInProblem, reaction, velocity, info, options, numerics_options);
    free(Me);
    free(Qe);
    free(Mi);
    free(Qi);
    return;
  }

  double * pseduInvMe1 = (double *)malloc(Me_size * Me_size * sizeof(double));
  memcpy(pseduInvMe1, Me, Me_size * Me_size * sizeof(double));
  pinv(pseduInvMe1, Me_size, Me_size, 1e-16);
  double *Mi2 = Mi + Mi_size * Me_size;
  double *Mi1 = Mi;
  double *Me2 = Me + Me_size * Me_size;
#ifdef GMP_DEBUG_GMPREDUCED_SOLVE
  double *Me1 = Me;
  FILE * titi  = fopen("buildReducedGMP_output.txt", "w");
  printf("GMPReducedsolve\n");
  printDenseMatrice("Me1", titi, Me1, Me_size, Me_size);
  printDenseMatrice("Me2", titi, Me2, Me_size, Mi_size);
  printDenseMatrice("Mi1", titi, Mi1, Mi_size, Me_size);
  printDenseMatrice("Mi2", titi, Mi2, Mi_size, Mi_size);
  printDenseMatrice("Qe", titi, Qe, Me_size, 1);
  printDenseMatrice("Qi", titi, Qi, Mi_size, 1);
  printDenseMatrice("Me1inv", titi, pseduInvMe1, Me_size, Me_size);
#endif


  double * reducedProb = (double *)malloc(Mi_size * Mi_size * sizeof(double));
  memcpy(reducedProb, Mi2, Mi_size * Mi_size * sizeof(double));

  double * Mi1pseduInvMe1 = (double *)malloc(Mi_size * Me_size * sizeof(double));
  cblas_dgemm(CblasColMajor,CblasNoTrans, CblasNoTrans, Mi_size, Me_size, Me_size, -1.0, Mi1, Mi_size, pseduInvMe1, Me_size, 0.0, Mi1pseduInvMe1, Mi_size);
#ifdef GMP_DEBUG_GMPREDUCED_SOLVE
  printDenseMatrice("minusMi1pseduInvMe1", titi, Mi1pseduInvMe1, Mi_size, Me_size);
  fprintf(titi, "_minusMi1pseduInvMe1=-Mi1*Me1inv;\n");
#endif
  cblas_dgemv(CblasColMajor,CblasNoTrans, Mi_size, Me_size, 1.0, Mi1pseduInvMe1, Mi_size, Qe, 1, 1.0, Qi, 1);
#ifdef GMP_DEBUG_GMPREDUCED_SOLVE
  printDenseMatrice("newQi", titi, Qi, Mi_size, 1);
  fprintf(titi, "_newQi=Qi+_minusMi1pseduInvMe1*Qe;\n");
#endif
  cblas_dgemm(CblasColMajor,CblasNoTrans, CblasNoTrans, Mi_size, Mi_size, Me_size, 1.0, Mi1pseduInvMe1, Mi_size, Me2, Me_size, 1.0, reducedProb, Mi_size);
#ifdef GMP_DEBUG_GMPREDUCED_SOLVE
  printDenseMatrice("W", titi, reducedProb, Mi_size, Mi_size);
  fprintf(titi, "_W=Mi2+_minusMi1pseduInvMe1*Me2;\n");

#endif
  listNumericsProblem * curProblem = 0;
  GenericMechanicalProblem * _pnumerics_GMP = buildEmptyGenericMechanicalProblem();
  curProblem =  pInProblem->firstListElem;
  while (curProblem)
  {
    switch (curProblem->type)
    {
    case SICONOS_NUMERICS_PROBLEM_EQUALITY:
    {
      break;
    }
    case SICONOS_NUMERICS_PROBLEM_LCP:
    {
      addProblem(_pnumerics_GMP, curProblem->type, curProblem->size);
      break;
    }
    case SICONOS_NUMERICS_PROBLEM_FC3D:
    {
      FrictionContactProblem* pFC3D = (FrictionContactProblem*)addProblem(_pnumerics_GMP, curProblem->type, curProblem->size);
      *(pFC3D->mu) = *(((FrictionContactProblem*)curProblem->problem)->mu);
      break;
    }
    default:
      printf("GMPReduced  buildReducedGMP: problemType unknown: %d . \n", curProblem->type);
    }
    curProblem = curProblem->nextProblem;
  }
  NumericsMatrix numM;
  numM.storageType = 0;
  numM.matrix0 = reducedProb;
  numM.matrix1 = 0;
  numM.size0 = Mi_size;
  numM.size1 = Mi_size;
  _pnumerics_GMP->M = &numM;
  _pnumerics_GMP->q = Qi;
  double *Rreduced = (double *) malloc(Mi_size * sizeof(double));
  double *Vreduced = (double *) malloc(Mi_size * sizeof(double));
  genericMechanicalProblem_GS(_pnumerics_GMP, Rreduced, Vreduced, info, options, numerics_options);
#ifdef GMP_DEBUG_GMPREDUCED_SOLVE
  if (*info)
  {
    printf("\nGMPREduced failed!\n");
  }
  else
  {
    printf("\nGMPREduced succed!\n");
    printDenseMatrice("Ri", titi, Rreduced, Mi_size, 1);
    printDenseMatrice("Vi", titi, Vreduced, Mi_size, 1);
  }
#endif
  if (!*info)
  {
    /*Re computation*/
    double * Re = (double*)malloc(Me_size * sizeof(double));
    double * Rbuf = (double*)malloc(Me_size * sizeof(double));
    memcpy(Rbuf, Qe, Me_size * sizeof(double));
    cblas_dgemv(CblasColMajor,CblasNoTrans, Me_size, Mi_size, 1.0, Me2, Me_size, Rreduced, 1, 1.0, Rbuf, 1);
    cblas_dgemv(CblasColMajor,CblasNoTrans, Me_size, Me_size, -1.0, pseduInvMe1, Me_size, Rbuf, 1, 0.0, Re, 1);
  #ifdef GMP_DEBUG_GMPREDUCED_SOLVE
    fprintf(titi, "_Re=-Me1inv*(Me2*Ri+Qe);\n");
    printDenseMatrice("Re", titi, Re, Me_size, 1);
  #endif
    GMPReducedSolToSol(pInProblem, reaction, velocity, Re, Rreduced, Vreduced);
    double err;
    int tolViolate = GenericMechanical_compute_error(pInProblem, reaction, velocity, options->dparam[0], options, &err);
    if (tolViolate)
    {
      printf("GMPReduced, warnning, reduced problem solved, but error of intial probleme violated tol = %e, err= %e\n", options->dparam[0], err);
    }
    free(Re);
    free(Rbuf);
  }

#ifdef GMP_DEBUG_GMPREDUCED_SOLVE
  fclose(titi);
#endif
  free(Rreduced);
  free(Vreduced);
  freeGenericMechanicalProblem(_pnumerics_GMP, NUMERICS_GMP_FREE_GMP);
  free(Me);
  free(Mi);
  free(Qe);
  free(Qi);
  free(pseduInvMe1);
  free(reducedProb);
  free(Mi1pseduInvMe1);
  //  GenericMechanicalProblem GMPOutProblem;
  //  SparseBlockStructuredMatrix mOut;

}
int test_prodNumericsMatrix(NumericsMatrix** MM)
{
  NumericsMatrix* M1 =  MM[0];
  NumericsMatrix* M2 =  MM[1];
  NumericsMatrix* M3 =  MM[2];
  NumericsMatrix* M4 =  MM[3];




  printf("== Numerics tests: prodNumericsMatrix(NumericsMatrix,vector) == \n");
  int i , n = M1->size1, m = 4;

  double * x = (double *)malloc(n * sizeof(double));
  double * x2 = (double *)malloc(m * sizeof(double));
  double alpha = 2.3, beta = 1.9;
  double * yref = (double *)malloc(n * sizeof(double));
  double * yref2 = (double *)malloc(n * sizeof(double));;
  double * y = (double *)malloc(n * sizeof(double));
  double * y2 = (double *)malloc(n * sizeof(double));
  for (i = 0; i < n; i++)
  {
    x[i] = i + 1.0;
    yref[i] = 0.1 * i;
    yref2[i] = 0.1 * i;
    y[i] = yref[i];
    y2[i] = yref2[i];
  }
  x2[0] = 0;
  x2[1] = 0;
  x2[2] = 0;
  x2[3] = 0;
  int incx = 1, incy = 1;
  cblas_dgemv(CblasColMajor, CblasNoTrans, n, n, alpha, M1->matrix0, n, x, incx, beta, yref, incy);

  prodNumericsMatrix(n, n, alpha, M1, x, beta, y);
  double tol = 1e-12;
  int info = 0;
  for (i = 0; i < n; i++)
  {
    if (fabs(y[i] - yref[i]) > tol) info = 1;
    //    printf("%lf\n", fabs(y[i]-yref[i]));
  }
  if (info == 0)
    printf("Step 0 ( y = alpha*A*x + beta*y, double* storage) ok ...\n");
  else
    printf("Step 0 ( y = alpha*A*x + beta*y, double* storage) failed ...\n");


  cblas_dgemv(CblasColMajor, CblasNoTrans, n, m, alpha, M3->matrix0, n, x2, incx, beta, yref2, incy);

  prodNumericsMatrix(m, n, alpha, M3, x2, beta, y2);
  for (i = 0; i < n; i++)
  {
    if (fabs(y2[i] - yref2[i]) > tol) info = 1;
    /*           printf("%lf\n", fabs(y2[i]-yref2[i])); */
    /*            printf("%lf\n",y2[i]); */
    /*            printf("%lf\n",yref2[i]); */
  }
  if (info == 0)
    printf("Step 1 ( y = alpha*A*x + beta*y, double* storage, non square) ok ...\n");
  else
    printf("Step 1 ( y = alpha*A*x + beta*y, double* storage, non square) failed ...\n");






  /* Sparse ... */
  for (i = 0; i < n; i++)
  {
    y[i] = 0.1 * i;
    y2[i] = 0.1 * i;
  }
  prodNumericsMatrix(n, n, alpha, M2, x, beta, y);
  for (i = 0; i < n; i++)
  {
    if (fabs(y[i] - yref[i]) > tol) info = 1;
    /*       printf("%lf\n", fabs(y[i]-yref[i]));  */
    /*       printf("%lf\n", y[i]);  */
  }
  if (info == 0)
    printf("Step 2 ( y = alpha*A*x + beta*y, sparse storage) ok ...\n");
  else
    printf("Step 2 ( y = alpha*A*x + beta*y,  sparsestorage) failed ...\n");



  prodNumericsMatrix(m, n, alpha, M4, x2, beta, y2);
  for (i = 0; i < n; i++)
  {

    if (fabs(y2[i] - yref2[i]) > tol) info = 1;
    /*      printf("%lf\n", fabs(y2[i]-yref2[i]));  */
    /*       printf("%lf\n",y2[i]); */
    /*       printf("%lf\n",yref2[i]); */
  }

  if (info == 0)
    printf("Step 3 ( y = alpha*A*x + beta*y, sparse storage, non square) ok ...\n");
  else
    printf("Step 3 ( y = alpha*A*x + beta*y,  sparsestorage, non square) failed ...\n");



  free(x);
  free(x2);
  free(y);
  free(y2);
  free(yref);
  free(yref2);

  printf("== End of test prodNumericsMatrix(NumericsMatrix,vector), result = %d\n", info);

  return info;
}