/*
 * 最大最大激励化 第 unitdx 个单元
 *
 */
double LayerWiseRBMs::maximizeUnit(int layerIdx, int unitIdx, double * unitSample, double argvNorm, int epoch){

    int AMnumIn = layers[0]->numVis;                                            

    // unitsample 归一化
    double curNorm = squareNorm(unitSample, AMnumIn, 1);
    cblas_dscal(AMnumIn, argvNorm / curNorm, unitSample, 1);
	
	double maxValue =0;

	for(int k=0; k<epoch; k++){
	// forward
		for(int i=0; i<=layerIdx; i++){
			if(i==0)
				layers[i]->setInput(unitSample);
			else
				layers[i]->setInput(layers[i-1]->getOutput());
			layers[i]->setBatchSize(1);
			layers[i]->runBatch();
		}
		maxValue = layers[layerIdx]->getOutput()[unitIdx];
	//back propagate
		for(int i=layerIdx; i>=0; i--){
			if(i==layerIdx)
				layers[i]->getAMDelta(unitIdx, NULL)	;
			else
				layers[i]->getAMDelta(-1, layers[i+1]->AMDelta);
		}
        double lr = 0.01 * cblas_dasum(AMnumIn, unitSample, 1) /                
                    cblas_dasum(AMnumIn, layers[0]->AMDelta, 1);
		
	// update unitSample
		cblas_daxpy(AMnumIn, lr, layers[0]->AMDelta, 1, unitSample, 1);
	//归一化 unitSample
		curNorm = squareNorm(unitSample, AMnumIn, 1);
	    cblas_dscal(AMnumIn, argvNorm / curNorm, unitSample, 1);
	
	}
	return maxValue;
}
Exemple #2
0
Result merge(Result* results) {
  Result r1 = results[0];
  Result r2 = results[1];

  if (r1.C + r1.n * r1.CM == r2.C) { // split n
    Result r = {r1.m, 2*r1.n, r1.CM, r1.C};
    return r;

  } else if (r1.C + r1.m == r2.C) { // split m
    Result r = {2*r1.m, r1.n, r1.CM, r1.C};
    return r;

  } else { // split k
    int x;
    for (x = 0; x < r1.n; x++) {
      cblas_daxpy(r2.m, 1, r2.C + r2.m * x, 1, r1.C + r1.CM * x, 1);
    }
    free(r2.C);
    Result r = {r1.m, r1.n, r1.CM, r1.C};
    return r;
  }
}
Exemple #3
0
// Calculates p
void cg_solver_calc_p(
        const int x,
        const int y,
        const int z,
        const int halo_depth,
        const double beta,
        double* vec_p,
        double* vec_r)
{
    int x_inner = x - 2*halo_depth;

#pragma omp parallel for
    for(int ii = halo_depth; ii < z-halo_depth; ++ii)
    {
        for(int jj = halo_depth; jj < y-halo_depth; ++jj)
        {
            const int offset = ii*x*y + jj*x + halo_depth;
            cblas_dscal(x_inner, beta, vec_p + offset, 1);
            cblas_daxpy(x_inner, 1.0, vec_r + offset, 1, vec_p + offset, 1);
        }
    }
}
Exemple #4
0
int main()
{
    int n = 10;
    int in_x =1;
    int in_y =1;

    std::vector<double> x(n);
    std::vector<double> y(n);

    double alpha = 10;

    std::fill(x.begin(),x.end(),1.0);
    std::fill(y.begin(),y.end(),2.0);

    cblas_daxpy( n, alpha, &x[0], in_x, &y[0], in_y);

    //Print y 
    for(int j=0;j<n;j++)
        std::cout << y[j] << "\t";

    std::cout << std::endl;
}
Exemple #5
0
    void Map::process(const double* inputs, double* outputs)
    {
		if(m_first_source > -1)
        {
            m_encoders[m_first_source]->process(inputs[m_first_source] * m_gains[m_first_source], m_harmonics_double);
            m_widers[m_first_source]->process(m_harmonics_double, outputs);
			
            for(unsigned int i = m_first_source+1; i < m_number_of_sources; i++)
            {
                if (!m_muted[i])
                {
                    m_encoders[i]->process(inputs[i] * m_gains[i], m_harmonics_double);
                    m_widers[i]->process(m_harmonics_double, m_harmonics_double);
                    cblas_daxpy(m_number_of_harmonics, 1.f, m_harmonics_double, 1, outputs, 1);
                }
            }
        }
        else
        {
            for(unsigned int i = 0; i < m_number_of_harmonics; i++)
                outputs[i] = 0.;
        }
    }
int main () {
  typedef boost::multi_array<double, 1> vector;
  typedef vector::index vector_index;

  int N = 100000000;

  vector x(boost::extents[N]);
  vector y(boost::extents[N]);
  vector a(boost::extents[N]);

#pragma omp parallel for
  for (vector_index i = 0; i < N; i++) {
    x[i] = 1;
    y[i] = 1;
    a[i] = y[i];
  }

  cblas_daxpy(N, 1.0, &x[0], 1, &a[0], 1);

  std::cout << a[0] << '\n';

  return 0;
}
Exemple #7
0
JNIEXPORT void JNICALL Java_edu_berkeley_bid_CBLAS_dmcsrm 
(JNIEnv * env, jobject calling_obj, jint M, jint N, jdoubleArray j_A, jint lda, 
 jdoubleArray j_B, jintArray j_ir, jintArray j_jc, jdoubleArray j_C, jint ldc){
	jdouble * A = (*env)->GetPrimitiveArrayCritical(env, j_A, JNI_FALSE);
	jdouble * B = (*env)->GetPrimitiveArrayCritical(env, j_B, JNI_FALSE);
	jint * ir = (*env)->GetPrimitiveArrayCritical(env, j_ir, JNI_FALSE);
	jint * jc = (*env)->GetPrimitiveArrayCritical(env, j_jc, JNI_FALSE);
	jdouble * C = (*env)->GetPrimitiveArrayCritical(env, j_C, JNI_FALSE);

    int ioff = jc[0];
    int i, j, k;
    for (i = 0; i < N; i++) {
      for (j = jc[i]-ioff; j < jc[i+1]-ioff; j++) {
        k = ir[j]-ioff;
        cblas_daxpy(M, B[j], A+(i*lda), 1, C+(k*ldc), 1);
      }
    }

	(*env)->ReleasePrimitiveArrayCritical(env, j_C, C, 0);
	(*env)->ReleasePrimitiveArrayCritical(env, j_jc, jc, 0);	
    (*env)->ReleasePrimitiveArrayCritical(env, j_ir, ir, 0);
	(*env)->ReleasePrimitiveArrayCritical(env, j_B, B, 0);
	(*env)->ReleasePrimitiveArrayCritical(env, j_A, A, 0);
}
Exemple #8
0
CPS_START_NAMESPACE
//--------------------------------------------------------------------
//  CVS keywords
//
//  $Source: /home/chulwoo/CPS/repo/CVS/cps_only/cps_pp/src/util/dirac_op/d_op_mobius/noarch/mobius_m.C,v $
//  $State: Exp $
//
//--------------------------------------------------------------------
//------------------------------------------------------------------
// mobius_m.C
//
// mobius_m is the fermion matrix.
// The in, out fields are defined on the checkerboard lattice
//
//------------------------------------------------------------------

CPS_END_NAMESPACE
#include<util/dwf.h>
#include<util/mobius.h>
#include<util/gjp.h>
#include<util/vector.h>
#include<util/verbose.h>
#include<util/error.h>
#include<util/dirac_op.h>
#include<util/time_cps.h>

#include "blas-subs.h"

CPS_START_NAMESPACE


//4d precond. mobius Dirac op:
// M_5 - kappa_b^2 M4eo M_5^-1 M4oe
void  mobius_m(Vector *out,
               Matrix *gauge_field,
               Vector *in,
               Float mass,
               Dwf *mobius_lib_arg)
{

    //------------------------------------------------------------------
    // Initializations
    //------------------------------------------------------------------
    const int f_size = 24 * mobius_lib_arg->vol_4d * mobius_lib_arg->ls / 2;
    const Float kappa_ratio = mobius_lib_arg->mobius_kappa_b/mobius_lib_arg->mobius_kappa_c;
    const Float minus_kappa_b_sq = -mobius_lib_arg->mobius_kappa_b * mobius_lib_arg->mobius_kappa_b;
    Vector  *frm_tmp2 = (Vector *) mobius_lib_arg->frm_tmp2;
    //Vector *temp = (Vector *) smalloc(f_size * sizeof(Float));
    Float norm;


    //  out = [ 1 + kappa_b/kappa_c 1/2 dslash_5  - kappa_b^2 Meo M5inv Moe] in
    // (dslash_5 uses (1+-g5), not P_R,L, i.e. no factor of 1/2 which is here out front)
    //    1. ftmp2 = Meo M5inv Moe in
    //    2. out <-  in
    //    3. out += -kappa_b^2 ftmp2
    //    4. out +=  -kappa_b/kappa_c /2 dslash_5 in
    //         (done by the dslash_5 with a5_inv = -kappa_b/kappa_c/2 *GJP.MobiusA5Inv() )


    //--------------------------------------------------------------
    //    1. ftmp2 = Meo M5inv Moe in
    //--------------------------------------------------------------
    // Apply Dslash O <- E
    //------------------------------------------------------------------
    time_elapse();
    mobius_dslash_4(out, gauge_field, in, 0, 0, mobius_lib_arg, mass);
    DEBUG_MOBIUS_DSLASH("mobius_dslash_4 %e\n", time_elapse());

    //------------------------------------------------------------------
    // Apply M_5^-1 (hopping in 5th dir + diagonal)
    //------------------------------------------------------------------
    mobius_m5inv(out, mass, 0, mobius_lib_arg);
    DEBUG_MOBIUS_DSLASH("mobius_m5inv %e\n", time_elapse());

    //------------------------------------------------------------------
    // Apply Dslash E <- O
    //------------------------------------------------------------------
    mobius_dslash_4(frm_tmp2, gauge_field, out, 1, 0, mobius_lib_arg, mass);
    DEBUG_MOBIUS_DSLASH("mobius_dslash_4 %e\n", time_elapse());

    //------------------------------------------------------------------
    //    2. out <-  in
    //------------------------------------------------------------------
#ifndef USE_BLAS
    moveFloat((IFloat*)out, (IFloat*)in, f_size);
#else
    cblas_dcopy(f_size, (IFloat*)in, 1, (IFloat*)out, 1);
#endif
    DEBUG_MOBIUS_DSLASH("out <- in %e\n", time_elapse());

    //------------------------------------------------------------------
    //    3. out += -kap2 ftmp2
    //------------------------------------------------------------------
#ifndef USE_BLAS
    fTimesV1PlusV2((IFloat*)out, minus_kappa_b_sq, (IFloat*)frm_tmp2,
                   (IFloat *)out, f_size);
#else

    cblas_daxpy(f_size, minus_kappa_b_sq, (IFloat*)frm_tmp2,1,
                (IFloat *)out,  1);
#endif
    DEBUG_MOBIUS_DSLASH("mobius out+= kap2 %e\n", time_elapse());

    //------------------------------------------------------------------
    //    4. out +=  kappa_b/kappa_c dslash_5 in
    //------------------------------------------------------------------
    mobius_kappa_dslash_5_plus(out, in, mass, 0, mobius_lib_arg, kappa_ratio);
    DEBUG_MOBIUS_DSLASH("mobius_kappa_dslash_5_plus %e\n", time_elapse());

    // Flops count in this function is two AXPY = 4 flops per vector elements
    //DiracOp::CGflops +=  3*f_size;

}
void plotMerit(double *z, double psi_k, double descentCondition)
{
  int incx = 1, incy = 1;
  double q_0, q_tk, qp_tk, merit_k;
  /* double tmin = 1e-12; */
  double tk = 1, aux;
  double m1 = 1e-4;
  double Nstep = 0;
  int i = 0;

  FILE *fp;

  (*sFphi)(sN, z, sphi_z, 0);
  aux = cblas_dnrm2(sN, sphi_z, 1);
  /* Computes merit function */
  aux = 0.5 * aux * aux;
  printf("plot psi_z %e\n", aux);


  if (!sPlotMerit)
    return;

  if (sPlotMerit)
  {
    /*    sPlotMerit=0;*/
    strcpy(fileName, "outputLS");


    (*sFphi)(sN, z, sphi_z, 0);
    q_0 =  cblas_dnrm2(sN, sphi_z , incx);
    q_0 = 0.5 * q_0 * q_0;

    fp = fopen(fileName, "w");

    /*    sPlotMerit=0;*/
    tk = 5e-7;
    aux = -tk;
    Nstep = 1e4;
    for (i = 0; i < 2 * Nstep; i++)
    {
      cblas_dcopy(sN, z, incx, sz2, incx);
      cblas_daxpy(sN , aux , sdir_descent , incx , sz2 , incy);
      (*sFphi)(sN, sz2, sphi_z, 0);
      q_tk =  cblas_dnrm2(sN, sphi_z , incx);
      q_tk = 0.5 * q_tk * q_tk;


      (*sFjacobianPhi)(sN, sz2, sjacobianPhi_z, 1);
      /* Computes the jacobian of the merit function, jacobian_psi = transpose(jacobianPhiMatrix).phiVector */
      cblas_dgemv(CblasColMajor,CblasTrans, sN, sN, 1.0, sjacobianPhi_z, sN, sphi_z, incx, 0.0, sgrad_psi_z, incx);
      qp_tk = cblas_ddot(sN, sgrad_psi_z, 1, sdir_descent, 1);

      merit_k = psi_k + m1 * aux * descentCondition;


      fprintf(fp, "%e %.16e %.16e %e\n", aux, q_tk, merit_k, qp_tk);
      if (i == Nstep - 1)
        aux = 0;
      else
        aux += tk / Nstep;
    }

    fclose(fp);
  }
}
int lineSearch_Wolfe(double *z, double qp_0)
{
  int incx = 1, incy = 1;
  double q_0, q_tk, qp_tk;
  double tmin = 1e-12;
  int maxiter = 100;
  int niter = 0;
  double tk = 1;
  double tg, td;
  double m1 = 0.1;
  double m2 = 0.9;


  (*sFphi)(sN, z, sphi_z, 0);
  q_0 =  cblas_dnrm2(sN, sphi_z , incx);
  q_0 = 0.5 * q_0 * q_0;

  tg = 0;
  td = 10e5;

  tk = (tg + td) / 2.0;

  while (niter < maxiter || (td - tg) < tmin)
  {
    niter++;
    /*q_tk = 0.5*|| phi(z+tk*d) ||*/
    cblas_dcopy(sN, z, incx, sz2, incx);
    cblas_daxpy(sN , tk , sdir_descent , incx , sz2 , incy);
    (*sFphi)(sN, sz2, sphi_z, 0);
    q_tk =  cblas_dnrm2(sN, sphi_z , incx);
    q_tk = 0.5 * q_tk * q_tk;

    (*sFjacobianPhi)(sN, sz2, sjacobianPhi_z, 1);
    /* Computes the jacobian of the merit function, jacobian_psi = transpose(jacobianPhiMatrix).phiVector */
    cblas_dgemv(CblasColMajor,CblasTrans, sN, sN, 1.0, sjacobianPhi_z, sN, sphi_z, incx, 0.0, sgrad_psi_z, incx);
    qp_tk = cblas_ddot(sN, sgrad_psi_z, 1, sdir_descent, 1);
    if (qp_tk <  m2 * qp_0 && q_tk < q_0 + m1 * tk * qp_0)
    {
      /*too small*/
      if (niter == 1)
        break;
      tg = tk;
      tk = (tg + td) / 2.0;
      continue;
    }
    else if (q_tk > q_0 + m1 * tk * qp_0)
    {
      /*too big*/
      td = tk;
      tk = (tg + td) / 2.0;
      continue;
    }
    else
      break;
  }

  cblas_dcopy(sN, sz2, incx, z, incx);

  if ((td - tg) <= tmin)
  {
    printf("NonSmoothNewton2::lineSearchWolfe warning, resulting tk < tmin, linesearch stopped.\n");
    return 0;
  }
  return 1;

}
void caffe_axpy<double>(const int N, const double alpha, const double* X,
                        double* Y) {
    cblas_daxpy(N, alpha, X, 1, Y, 1);
}
void eblas_daxpy_sub(size_t iStart, size_t iStop, double a, const double* x, int incx, double* y, int incy)
{	cblas_daxpy(iStop-iStart, a, x+incx*iStart, incx, y+incy*iStart, incy);
}
Exemple #13
0
void lcp_latin(LinearComplementarityProblem* problem, double *z, double *w, int *info , SolverOptions* options)
{
  /* matrix M of the lcp */
  double * M = problem->M->matrix0;

  /* size of the LCP */
  int n = problem->size;
  int n2 = n * n;


  int i, j,  iter1, nrhs;
  int info2 = 0;
  int itt, it_end;
  int incx, incy;
  int itermax = options->iparam[0];
  double tol = options->dparam[0];
  double k_latin = options->dparam[2];
  double alpha, beta;
  double err1;
  double res, errmax;
  double  *wc, *zc, *kinvden1, *kinvden2, *wt;
  double *maxwt, *wnum1, *znum1, *ww, *zz;
  double *num1, *kinvnum1, *den1, *den2, *wden1, *zden1;
  double  *kinvwden1, *kzden1;
  double  *k, *kinv, *DPO;

  // char trans='T', notrans='N', uplo='U', diag='N';
  incx = 1;
  incy = 1;

  /* Recup input */


  errmax = tol;
  itt = itermax;


  /* Initialize output */

  options->iparam[1] = 0;
  options->dparam[1] = 0.0;

  /* Allocations */

  ww        = (double*) malloc(n * sizeof(double));
  zz        = (double*) malloc(n * sizeof(double));
  wc        = (double*) malloc(n * sizeof(double));
  zc        = (double*) malloc(n * sizeof(double));
  znum1     = (double*) malloc(n * sizeof(double));
  wnum1     = (double*) malloc(n * sizeof(double));
  kinvden1  = (double*) malloc(n * sizeof(double));
  kinvden2  = (double*) malloc(n * sizeof(double));
  wt        = (double*) malloc(n * sizeof(double));
  maxwt     = (double*) malloc(n * sizeof(double));
  num1      = (double*) malloc(n * sizeof(double));
  kinvnum1  = (double*) malloc(n * sizeof(double));
  den1      = (double*) malloc(n * sizeof(double));
  den2      = (double*) malloc(n * sizeof(double));
  wden1     = (double*) malloc(n * sizeof(double));
  zden1     = (double*) malloc(n * sizeof(double));
  kinvwden1 = (double*) malloc(n * sizeof(double));
  kzden1    = (double*) malloc(n * sizeof(double));
  DPO       = (double*) malloc(n2 * sizeof(double));
  k         = (double*) malloc(n2 * sizeof(double));
  kinv      = (double*) malloc(n2 * sizeof(double));



  /* Initialization */

  for (i = 0; i < n2; i++)
  {


    if (i < n)
    {

      wc[i]       = 0.0;
      zc[i]       = 0.0;
      z[i]        = 0.0;
      w[i]        = 0.0;
      znum1[i]    = 0.0;
      wnum1[i]    = 0.0;
      kinvden1[i] = 0.0;
      kinvden2[i] = 0.0;
      wt[i]       = 0.0;
      maxwt[i]    = 0.0;
      num1[i]     = 0.0;
      kinvnum1[i] = 0.0;
      den1[i]     = 0.0;
      den2[i]     = 0.0;
    }

    k[i]          = 0.0;
    kinv[i]       = 0.0;
    DPO[i]        = 0.0;


  }





  for (i = 0 ; i < n ; i++)
  {

    k[i * n + i] =  k_latin * M[i * n + i];

    if (fabs(k[i * n + i]) < DBL_EPSILON)
    {

      if (verbose > 0)
      {
        printf(" Warning nul diagonal term in k matrix \n");
      }

      free(ww);
      free(zz);
      free(wc);
      free(zc);
      free(znum1);
      free(wnum1);
      free(kinvden1);
      free(kinvden2);
      free(wt);
      free(maxwt);
      free(num1);
      free(kinvnum1);
      free(den1);
      free(den2);
      free(wden1);
      free(zden1);
      free(kinvwden1);
      free(kzden1);
      free(DPO);
      free(k);
      free(kinv);

      *info = 3;

      return;

    }
    else

      kinv[i + n * i] = 1.0 / k[i + n * i];

  }



  for (i = 0; i < n; i++)
    for (j = 0; j < n; j++)
      DPO[i + n * j] = M[j * n + i] + k[i + n * j];







  /*            Cholesky              */


  DPOTRF(LA_UP, n, DPO , n, &info2);


  if (info2 != 0)
  {
    printf(" Matter with Cholesky Factorization \n ");

    free(ww);
    free(zz);
    free(wc);
    free(zc);
    free(znum1);
    free(wnum1);
    free(kinvden1);
    free(kinvden2);
    free(wt);
    free(maxwt);
    free(num1);
    free(kinvnum1);
    free(den1);
    free(den2);
    free(wden1);
    free(zden1);
    free(kinvwden1);
    free(kzden1);
    free(DPO);
    free(k);
    free(kinv);

    *info = 2;
    return;
  }

  /*            End of Cholesky          */




  /*            Iteration loops  */

  iter1 = 0;
  err1 = 1.;


  while ((iter1 < itt) && (err1 > errmax))
  {

    /*       Linear stage (zc,wc) -> (z,w)*/


    alpha = 1.;
    beta  = 1.;
    cblas_dgemv(CblasColMajor,CblasTrans, n, n, alpha, k, n, zc, incx, beta, wc, incy);


    cblas_dcopy(n, problem->q, incx, znum1, incy);


    alpha = -1.;
    cblas_dscal(n , alpha , znum1 , incx);

    alpha = 1.;
    cblas_daxpy(n, alpha, wc, incx, znum1, incy);
    nrhs = 1;
    DTRTRS(LA_UP, LA_TRANS, LA_NONUNIT, n, nrhs, DPO, n, znum1, n, &info2);
    DTRTRS(LA_UP, LA_NOTRANS, LA_NONUNIT, n, nrhs, DPO, n, znum1, n, &info2);

    cblas_dcopy(n, znum1, incx, z, incy);



    alpha = -1.;
    beta = 1.;
    cblas_dgemv(CblasColMajor,CblasTrans, n, n, alpha, k, n, z, incx, beta, wc, incy);

    cblas_dcopy(n, wc, incx, w, incy);


    /*         Local Stage                  */

    cblas_dcopy(n, w, incx, wt, incy);
    alpha = -1.;
    beta = 1.;
    cblas_dgemv(CblasColMajor,CblasTrans, n, n, alpha, k, n, z, incx, beta, wt, incy);

    for (i = 0; i < n; i++)
    {
      if (wt[i] > 0.0)
      {
        wc[i] = wt[i];
        zc[i] = 0.0;
      }
      else
      {
        wc[i] = 0.0;
        zc[i] =  -wt[i] / k[i + n * i];
      }
    }



    /*        Convergence criterium                */


    cblas_dcopy(n, w, incx, wnum1, incy);
    alpha = -1.;
    cblas_daxpy(n, alpha, wc, incx, wnum1, incy);


    cblas_dcopy(n, z, incx, znum1, incy);
    cblas_daxpy(n, alpha, zc, incx, znum1, incy);



    alpha = 1.;
    beta = 1.;
    cblas_dgemv(CblasColMajor,CblasTrans, n, n, alpha, k, n, znum1, incx, beta, wnum1, incy);


    /*   wnum1(:) =(w(:)-wc(:))+matmul( k(:,:),(z(:)-zc(:)))   */



    alpha = 1.;
    beta = 0.;
    cblas_dgemv(CblasColMajor,CblasTrans, n, n, alpha, kinv, n, wnum1, incx, beta, kinvnum1, incy);



    cblas_dcopy(n, z, incx, zz, incy);
    cblas_dcopy(n, w, incx, ww, incy);

    alpha = 1.;
    cblas_daxpy(n, alpha, wc, incx, ww, incy);

    cblas_daxpy(n, alpha, zc, incx, zz, incy);

    beta = 0.;
    alpha = 1.;
    cblas_dgemv(CblasColMajor,CblasTrans, n, n, alpha, k, n, zz, incx, beta, kzden1, incy);


    beta = 0.;
    alpha = 1.;
    cblas_dgemv(CblasColMajor,CblasTrans, n, n, alpha, kinv, n, ww, incx, beta, kinvwden1, incy);





    lcp_compute_error_only(n, z, w, &err1);

    it_end = iter1;
    res    = err1;

    iter1  = iter1 + 1;

    options->iparam[1] = it_end;
    options->dparam[1] = res;

  }



  if (isnan(err1) || (err1 > errmax))
  {
    if (verbose > 0) printf("No convergence of LATIN after %d iterations, the residue is %g\n", iter1, err1);
    *info = 1;
  }
  else
  {
    if (verbose > 0) printf("Convergence of LATIN after %d iterations, the residue is %g \n", iter1, err1);
    *info = 0;
  }




  free(wc);

  free(DPO);
  free(k);
  free(kinv);

  free(zz);
  free(ww);
  free(zc);
  free(znum1);
  free(wnum1);
  free(kinvden1);
  free(kinvden2);
  free(wt);
  free(maxwt);
  free(num1);
  free(kinvnum1);
  free(den1);
  free(den2);
  free(wden1);
  free(zden1);
  free(kinvwden1);
  free(kzden1);




}
Exemple #14
0
void get_top_delta(const da *m, const double *y, 
                   const double *x, double *d, const int batch_size){
    cblas_dcopy(batch_size * m->n_in, y, 1, d, 1);
    cblas_daxpy(batch_size * m->n_in, -1,
                x, 1, d, 1);
}
Exemple #15
0
void train_da(da *m, dataset_blas *train_set, dataset_blas *expected_set, 
              int mini_batch, int n_epcho, char* weight_filename){
    int i, j, k, p, q;
    int epcho;
    double cost, total_cost;
    time_t start_time, end_time;
    FILE *weight_file;

    //weight_file = fopen(weight_filename, "w");

    for(epcho = 0; epcho < n_epcho; epcho++){

        total_cost = 0.0;
        start_time = time(NULL);
        for(k = 0; k < train_set->N / mini_batch; k++){

            if((k+1) % 500 == 0){
                printf("epcho %d batch %d\n", epcho + 1, k + 1);
            }
            get_hidden_values(m, train_set->input + k * mini_batch * m->n_in, h_out, mini_batch);
            get_reconstruct_input(m, h_out, z_out, mini_batch);
            
            get_top_delta(m, z_out, expected_set->input + k * mini_batch * m->n_in, d_high, mini_batch);
            get_second_delta(m, h_out, d_high, d_low, mini_batch);

            /* modify weight matrix W */
            cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
                        m->n_out, m->n_in, mini_batch,
                        1, d_low, m->n_out,
                        train_set->input + k * mini_batch * m->n_in, m->n_in,
                        0, tr1, m->n_in);

            cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
                        m->n_out, m->n_in, mini_batch,
                        1, h_out, m->n_out,
                        d_high, m->n_in, 0, tr2, m->n_in);

            cblas_daxpy(m->n_out * m->n_in, 1, tr1, 1, tr2, 1);
            
            cblas_daxpy(m->n_out * m->n_in, -eta / mini_batch, tr2, 1, m->W, 1);

            /* modify bias vector */
            cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
                        m->n_out, 1, mini_batch,
                        1, d_low, m->n_out,
                        Ivec, 1, 0, tr1, 1);

            cblas_daxpy(m->n_out, -eta / mini_batch, tr1, 1, m->b, 1);
            
            cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans,
                        m->n_in, 1, mini_batch,
                        1, d_high, m->n_in,
                        Ivec, 1, 0, tr1, 1);

            cblas_daxpy(m->n_in, -eta / mini_batch, tr1, 1, m->c, 1);

            for(i = 0; i < mini_batch * m->n_in; i++){
                tr1[i] = log(z_out[i]);
            }
            total_cost -= cblas_ddot(mini_batch * m->n_in, expected_set->input + k * mini_batch * m->n_in, 1,
                                     tr1, 1) / mini_batch;
            for(i = 0; i < mini_batch * m->n_in; i++){
                tr1[i] = log(1.0 - z_out[i]);
            }
            cblas_dcopy(mini_batch * m->n_in, Ivec, 1, tr2, 1);
            cblas_daxpy(mini_batch * m->n_in, -1, expected_set->input + k * mini_batch * m->n_in,
                        1, tr2, 1);
            total_cost -= cblas_ddot(mini_batch * m->n_in, tr1, 1, tr2, 1) / mini_batch;

        }
        end_time = time(NULL);
        printf("epcho %d cost: %.5lf\ttime: %ds\n", epcho + 1, total_cost / train_set->N * mini_batch, (int)(end_time - start_time));
    }

    //fclose(weight_file);
}
void bi_conjugate_gradient_sparse(cs *A, double *b, double* x, int n, double itol){
   
    int i,j,iter;
     
    double rho,rho1,alpha,beta,omega;
     
    double r[n], r_t[n];
    double z[n], z_t[n];
    double q[n], q_t[n], temp_q[n];
    double p[n], p_t[n], temp_p[n];
    double res[n];                  //NA VGEI!
    double precond[n];
     
    //Initializations      
    memset(precond, 0, n*sizeof(double));
    memset(r, 0, n*sizeof(double));
    memset(r_t, 0, n*sizeof(double));
    memset(z, 0, n*sizeof(double));
    memset(z_t, 0, n*sizeof(double));
    memset(q, 0, n*sizeof(double));
    memset(q_t, 0, n*sizeof(double));
    memset(temp_q, 0, n*sizeof(double));
    memset(p, 0, n*sizeof(double));
    memset(p_t, 0, n*sizeof(double));
    memset(temp_p, 0, n*sizeof(double));
    memset(res, 0, n*sizeof(double));
     
    /* Preconditioner */
    double max;
    int pp;
    for(j = 0; j < n; ++j){
        for(pp = A->p[j], max = fabs(A->x[pp]); pp < A->p[j+1]; pp++)
            if(fabs(A->x[pp]) > max)                  //vriskei to diagonio stoixeio
                max = fabs(A->x[pp]);
        precond[j] = 1/max;    
    }  
    cs *AT = cs_transpose (A, 1) ;
 
    cblas_dcopy (n, x, 1, res, 1);
 
    //r=b-Ax
    cblas_dcopy (n, b, 1, r, 1);
    memset(p, 0, n*sizeof(double));
    cs_gaxpy (A, x, p);
    for(i=0;i<n;i++){
        r[i]=r[i]-p[i];
     
    }
     
    cblas_dcopy (n, r, 1, r_t, 1);
     
    double r_norm = cblas_dnrm2 (n, r, 1);
    double b_norm = cblas_dnrm2 (n, b, 1);
    if(!b_norm)
        b_norm = 1;
 
    iter = 0;  
   
    while( r_norm/b_norm > itol && iter < n ){
       
        iter++;
 
        cblas_dcopy (n, r, 1, z, 1);            //gia na min allaksei o r
        cblas_dcopy (n, r_t, 1, z_t, 1);        //gia na min allaksei o r_t
        for(i=0;i<n;i++){
            z[i]=precond[i]*z[i];
            z_t[i]=precond[i]*z_t[i];
        }
     
        rho = cblas_ddot (n, z, 1, r_t, 1);    
        if (fpclassify(fabs(rho)) == FP_ZERO){
            printf("RHO aborting Bi-CG due to EPS...\n");
            exit(42);
        }
         
        if (iter == 1){
            cblas_dcopy (n, z, 1, p, 1);
            cblas_dcopy (n, z_t, 1, p_t, 1);
        }
        else{      
            //p = z + beta*p;
            beta = rho/rho1;           
 
            cblas_dscal (n, beta, p, 1);        //rescale p by beta
            cblas_dscal (n, beta, p_t, 1);      //rescale p_t by beta
         
            cblas_daxpy (n, 1, z, 1, p, 1);     //p = 1*z + p
            cblas_daxpy (n, 1, z_t, 1, p_t, 1); //p_t = 1*z_t + p_t
        }
         
        rho1 = rho;
         
        //q = Ap
        //q_t = trans(A)*p_t
        memset(q, 0, n*sizeof(double));
        cs_gaxpy (A, p, q);
        memset(q_t, 0, n*sizeof(double));
        cs_gaxpy(AT, p_t, q_t);        
         
        omega = cblas_ddot (n, p_t, 1, q, 1);
        if (fpclassify(fabs(omega)) == FP_ZERO){
            printf("OMEGA aborting Bi-CG due to EPS...\n");
            exit(42);
        }
 
        alpha = rho/omega;     
 
        //x = x + aplha*p;
        cblas_dcopy (n, p, 1, temp_p, 1);
        cblas_dscal (n, alpha, temp_p, 1);//rescale by aplha
        cblas_daxpy (n, 1, temp_p, 1, res, 1);// sum x = 1*x + temp_p
 
        //R = R - aplha*Q;
        cblas_dcopy (n, q, 1, temp_q, 1);
        cblas_dscal (n, -alpha, temp_q, 1);//rescale by -aplha
        cblas_daxpy (n, 1, temp_q, 1, r, 1);// sum r = 1*r - temp_p    
 
        //~r=~r-alpha*~q
        cblas_dcopy (n, q_t, 1, temp_q, 1);
        cblas_dscal (n, -alpha, temp_q, 1);//rescale by -aplha
        cblas_daxpy (n, 1, temp_q, 1, r_t, 1);// sum r = 1*r - temp_p
 
        r_norm = cblas_dnrm2 (n, r, 1); //next step
    }
    cblas_dcopy (n, res, 1, x, 1);
 
    cs_spfree(AT);
}
Exemple #17
0
/* Ref: Weiss, Algorithm 11 CGS
 * INPUT
 *   n : dimension of the problem
 *   b [n] : r-h-s vector
 *   atimes (int n, static double *x, double *b, void *param) :
 *        calc matrix-vector product A.x = b.
 *   atimes_param : parameters for atimes().
 *   it : struct iter. following entries are used
 *        it->max = kend : max of iteration
 *        it->eps = eps  : criteria for |r^2|/|b^2|
 * OUTPUT
 *   returned value : 0 == success, otherwise (-1) == failed
 *   x [n] : solution
 *   it->niter : # of iteration
 *   it->res2  : |r^2| / |b^2|
 */
int
cgs (int n, const double *b, double *x,
     void (*atimes) (int, const double *, double *, void *),
     void *atimes_param,
     struct iter *it)
{
#ifndef HAVE_CBLAS_H
# ifdef HAVE_BLAS_H
  /* use Fortran BLAS routines */

  int i_1 = 1;
  double d_m1 = -1.0;
  double d_2 = 2.0;

# endif // !HAVE_BLAS_H
#endif // !HAVE_CBLAS_H

  int ret = -1;
  double eps2 = it->eps * it->eps;
  int itmax = it->max;

  double *r  = (double *)malloc (sizeof (double) * n);
  double *r0 = (double *)malloc (sizeof (double) * n);
  double *p  = (double *)malloc (sizeof (double) * n);
  double *u  = (double *)malloc (sizeof (double) * n);
  double *ap = (double *)malloc (sizeof (double) * n);
  double *q  = (double *)malloc (sizeof (double) * n);
  double *t  = (double *)malloc (sizeof (double) * n);
  CHECK_MALLOC (r,  "cgs");
  CHECK_MALLOC (r0, "cgs");
  CHECK_MALLOC (p,  "cgs");
  CHECK_MALLOC (u,  "cgs");
  CHECK_MALLOC (ap, "cgs");
  CHECK_MALLOC (q,  "cgs");
  CHECK_MALLOC (t,  "cgs");


  double r0ap;
  double rho, rho1;
  double delta;
  double beta;

  double res2 = 0.0;

#ifdef HAVE_CBLAS_H
  /**
   * ATLAS version
   */

  double b2 = cblas_ddot (n, b, 1, b, 1); // (b,b)
  eps2 *= b2;

  // initial residue
  atimes (n, x, r, atimes_param); // r = A.x
  cblas_daxpy (n, -1.0, b, 1, r, 1); // r = A.x - b

  cblas_dcopy (n, r, 1, r0, 1); // r0* = r
  cblas_dcopy (n, r, 1, p, 1); // p = r
  cblas_dcopy (n, r, 1, u, 1); // u = r

  rho = cblas_ddot (n, r0, 1, r, 1); // rho = (r0*, r)

  int i;
  for (i = 0; i < itmax; i ++)
    {
      atimes (n, p, ap, atimes_param); // ap = A.p
      r0ap = cblas_ddot (n, r0, 1, ap, 1); // r0ap = (r0*, A.p)
      delta = - rho / r0ap;

      cblas_dcopy (n, u, 1, q, 1); // q = u
      cblas_dscal (n, 2.0, q, 1); // q = 2 u
      cblas_daxpy (n, delta, ap, 1, q, 1); // q = 2 u + delta A.p

      atimes (n, q, t, atimes_param); // t = A.q

      cblas_daxpy (n, delta, t, 1, r, 1); // r = r + delta t
      cblas_daxpy (n, delta, q, 1, x, 1); // x = x + delta q

      res2 = cblas_ddot (n, r, 1, r, 1);
      if (it->debug == 2)
	{
	  fprintf (it->out, "libiter-cgs %d %e\n", i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      rho1 = cblas_ddot (n, r0, 1, r, 1); // rho = (r0*, r)
      beta = rho1 / rho;
      rho = rho1;

      // here t is not used so that this is used for working area.
      double *qu = t;
      cblas_dcopy (n, q, 1, qu, 1); // qu = q
      cblas_daxpy (n, -1.0, u, 1, qu, 1); // qu = q - u
      cblas_dcopy (n, r, 1, u, 1); // u = r
      cblas_daxpy (n, beta, qu, 1, u, 1); // u = r + beta (q - u)

      cblas_daxpy (n, beta, p, 1, qu, 1); // qu = q - u + beta * p
      cblas_dcopy (n, u, 1, p, 1); // p = u
      cblas_daxpy (n, beta, qu, 1, p, 1); // p = u + beta (q - u + b * p)
    }

#else // !HAVE_CBLAS_H
# ifdef HAVE_BLAS_H
  /**
   * BLAS version
   */

  double b2 = ddot_ (&n, b, &i_1, b, &i_1); // (b,b)
  eps2 *= b2;

  // initial residue
  atimes (n, x, r, atimes_param); // r = A.x
  daxpy_ (&n, &d_m1, b, &i_1, r, &i_1); // r = A.x - b

  dcopy_ (&n, r, &i_1, r0, &i_1); // r0* = r
  dcopy_ (&n, r, &i_1, p, &i_1); // p = r
  dcopy_ (&n, r, &i_1, u, &i_1); // u = r

  rho = ddot_ (&n, r0, &i_1, r, &i_1); // rho = (r0*, r)

  int i;
  for (i = 0; i < itmax; i ++)
    {
      atimes (n, p, ap, atimes_param); // ap = A.p
      r0ap = ddot_ (&n, r0, &i_1, ap, &i_1); // r0ap = (r0*, A.p)
      delta = - rho / r0ap;

      dcopy_ (&n, u, &i_1, q, &i_1); // q = u
      dscal_ (&n, &d_2, q, &i_1); // q = 2 u
      daxpy_ (&n, &delta, ap, &i_1, q, &i_1); // q = 2 u + delta A.p

      atimes (n, q, t, atimes_param); // t = A.q

      daxpy_ (&n, &delta, t, &i_1, r, &i_1); // r = r + delta t
      daxpy_ (&n, &delta, q, &i_1, x, &i_1); // x = x + delta q

      res2 = ddot_ (&n, r, &i_1, r, &i_1);
      if (it->debug == 2)
	{
	  fprintf (it->out, "libiter-cgs %d %e\n", i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      rho1 = ddot_ (&n, r0, &i_1, r, &i_1); // rho = (r0*, r)
      beta = rho1 / rho;
      rho = rho1;

      // here t is not used so that this is used for working area.
      double *qu = t;
      dcopy_ (&n, q, &i_1, qu, &i_1); // qu = q
      daxpy_ (&n, &d_m1, u, &i_1, qu, &i_1); // qu = q - u
      dcopy_ (&n, r, &i_1, u, &i_1); // u = r
      daxpy_ (&n, &beta, qu, &i_1, u, &i_1); // u = r + beta (q - u)

      daxpy_ (&n, &beta, p, &i_1, qu, &i_1); // qu = q - u + beta * p
      dcopy_ (&n, u, &i_1, p, &i_1); // p = u
      daxpy_ (&n, &beta, qu, &i_1, p, &i_1); // p = u + beta (q - u + b * p)
    }

# else // !HAVE_BLAS_H
  /**
   * local BLAS version
   */

  double b2 = my_ddot (n, b, 1, b, 1); // (b,b)
  eps2 *= b2;

  // initial residue
  atimes (n, x, r, atimes_param); // r = A.x
  my_daxpy (n, -1.0, b, 1, r, 1); // r = A.x - b

  my_dcopy (n, r, 1, r0, 1); // r0* = r
  my_dcopy (n, r, 1, p, 1); // p = r
  my_dcopy (n, r, 1, u, 1); // u = r

  rho = my_ddot (n, r0, 1, r, 1); // rho = (r0*, r)

  int i;
  for (i = 0; i < itmax; i ++)
    {
      atimes (n, p, ap, atimes_param); // ap = A.p
      r0ap = my_ddot (n, r0, 1, ap, 1); // r0ap = (r0*, A.p)
      delta = - rho / r0ap;

      my_dcopy (n, u, 1, q, 1); // q = u
      my_dscal (n, 2.0, q, 1); // q = 2 u
      my_daxpy (n, delta, ap, 1, q, 1); // q = 2 u + delta A.p

      atimes (n, q, t, atimes_param); // t = A.q

      my_daxpy (n, delta, t, 1, r, 1); // r = r + delta t
      my_daxpy (n, delta, q, 1, x, 1); // x = x + delta q

      res2 = my_ddot (n, r, 1, r, 1);
      if (it->debug == 2)
	{
	  fprintf (it->out, "libiter-cgs %d %e\n", i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      rho1 = my_ddot (n, r0, 1, r, 1); // rho = (r0*, r)
      beta = rho1 / rho;
      rho = rho1;

      // here t is not used so that this is used for working area.
      double *qu = t;
      my_dcopy (n, q, 1, qu, 1); // qu = q
      my_daxpy (n, -1.0, u, 1, qu, 1); // qu = q - u
      my_dcopy (n, r, 1, u, 1); // u = r
      my_daxpy (n, beta, qu, 1, u, 1); // u = r + beta (q - u)

      my_daxpy (n, beta, p, 1, qu, 1); // qu = q - u + beta * p
      my_dcopy (n, u, 1, p, 1); // p = u
      my_daxpy (n, beta, qu, 1, p, 1); // p = u + beta (q - u + b * p)
    }

# endif // !HAVE_BLAS_H
#endif // !HAVE_CBLAS_H

  free (r);
  free (r0);
  free (p);
  free (u);
  free (ap);
  free (q);
  free (t);

  if (it->debug == 1)
    {
      fprintf (it->out, "libiter-cgs it= %d res^2= %e\n", i, res2);
    }

  it->niter = i;
  it->res2  = res2 / b2;
  return (ret);
}
Exemple #18
0
void caffe_cpu_xpasv<double>(const int M, const int N, const double alpha,
    double* X, const double* a, const double* b) {
  for (int i = 0; i < M; ++i) {
    cblas_daxpy(N, alpha * a[i], b, 1, X + i * N, 1);
  }
}
void FrictionContact2D_latin(FrictionContactProblem* problem , double *reaction , double *velocity , int *info, SolverOptions* options)
{
  int nc = problem->numberOfContacts;
  assert(nc>0);
  double * vec = problem->M->matrix0;
  double *qq = problem->q;
  double * mu = problem->mu;



  int info77 = 0;
  int i, j, kk, iter1, ino, ddl, nrhs;
  int info2 = 0;
  int n = 2 * nc;
  size_t idim, nbno;
  int incx = 1, incy = 1;
  size_t taille, taillet, taillen, itt;
  int *ddln;
  int *ddlt, *vectnt;
  assert(n>0);

  double  errmax, alpha, beta, maxa, k_latin;
  double  aa, nt, wn, tc, zc0;
  double  err1, num11, err0;
  double  den11, den22, knz0, ktz0, *ktz, *wf;
  double  *wc, *zc, *wt, *maxwt, *wnum1, *znum1;
  double  *zt, *maxzt;

  double  *kn, *kt;

  // char    trans='T', diag='N';
  // char    uplo='U', notrans='N';



  double  *k, *DPO, *kf, *kninv;
  double  *kinvwden1, *kzden1, *kfinv, *knz, *wtnc;



  /*                Recup input                    */


  itt     = options->iparam[0];
  errmax  = options->dparam[0];
  k_latin = options->dparam[2];

  /*               Initialize output                */


  options->iparam[1] = 0;
  options->dparam[1] = 0.0;


  /*               Allocations                      */

  k         = (double*) malloc(n * n * sizeof(double));
  DPO       = (double*) malloc(n * n * sizeof(double));
  kf        = (double*) malloc(n * n * sizeof(double));
  kfinv     = (double*) malloc(n * n * sizeof(double));

  kninv     = (double*) malloc(nc * nc * sizeof(double));
  kn        = (double*) malloc(nc * nc * sizeof(double));
  kt        = (double*) malloc(nc * nc * sizeof(double));

  kinvwden1 = (double*) malloc(n  * sizeof(double));
  kzden1    = (double*) malloc(n  * sizeof(double));
  wc        = (double*) malloc(n  * sizeof(double));
  zc        = (double*) malloc(n  * sizeof(double));
  znum1     = (double*) malloc(n  * sizeof(double));
  wnum1     = (double*) malloc(n  * sizeof(double));
  wt        = (double*) malloc(n  * sizeof(double));
  maxzt     = (double*) malloc(n  * sizeof(double));



  knz       = (double*) malloc(nc * sizeof(double));
  wtnc      = (double*) malloc(nc * sizeof(double));
  ktz       = (double*) malloc(nc * sizeof(double));
  wf        = (double*) malloc(nc * sizeof(double));
  maxwt     = (double*) malloc(nc * sizeof(double));
  zt        = (double*) malloc(nc * sizeof(double));


  vectnt    = (int*) malloc(n * sizeof(int));

  ddln      = (int*) malloc(nc * sizeof(int));
  ddlt      = (int*) malloc(nc * sizeof(int));

  /*                    Initialization                   */



  for (i = 0; i < n * n; i++)
  {
    k[i]     = 0.;
    kf[i]    = 0.;
    kfinv[i] = 0.;

    if (i < nc * nc)
    {

      kn[i]    = 0.0;
      kt[i]    = 0.0;
      kninv[i] = 0.0;


      if (i < n)
      {
        wc[i]    = 0.0;
        zc[i]    = 0.;
        reaction[i]     = 0.;
        velocity[i]     = 0.;
        znum1[i] = 0.;
        wnum1[i] = 0.;
        wt[i]    = 0.;
        maxzt[i] = 0.;

        if (i < nc)
        {
          maxwt[i] = 0.;
          zt[i]    = 0.;
          knz[i]   = 0.;
          ktz[i]   = 0.;
          wf[i]    = 0.;
          wtnc[i]  = 0.;
        }

      }

    }
  }


  for (i = 0; i < n; i++)
  {

    if (fabs(vec[i * n + i]) < DBL_EPSILON)
    {

      if (verbose > 0)
        printf("\n Warning nul diagonal term in M matrix \n");

      free(k);
      free(DPO);
      free(kf);
      free(kfinv);
      free(kninv);
      free(kn);
      free(kt);
      free(kinvwden1);
      free(kzden1);
      free(wc);
      free(zc);
      free(znum1);
      free(wnum1);
      free(wt);
      free(maxzt);
      free(knz);
      free(wtnc);
      free(ktz);
      free(wf);
      free(maxwt);
      free(zt);
      free(vectnt);
      free(ddln);
      free(ddlt);

      *info = 3;

      return;


    }
    else
    {

      k[i + n * i] = k_latin / vec[i * n + i];
      vectnt[i] = i + 1;

    }

  }


  for (i = 0; i < nc; i++)
  {
    ddln[i] = vectnt[2 * i];
    if (i != 0) ddlt[i] = vectnt[2 * i - 1];
    else ddlt[i] = 0;

  }


  for (i = 0; i < nc; i++)
  {
    kn[i + nc * i] = k[ddln[i] + n * ddln[i]];
    kt[i + nc * i] = k[ddlt[i] + n * ddlt[i]];
  }




  taillen = sizeof(ddln) / sizeof(ddln[0]);
  taillet = sizeof(ddlt) / sizeof(ddlt[0]);

  idim = 1 +  taillen / taillet;

  taille = 0;
  for (i = 0; i < n; i++)
    taille = sizeof(qq[i]) + taille;

  taille = taille / sizeof(qq[0]);
  nbno = taille / idim;


  for (i = 0; i < nc; i++)
  {
    kf[ddln[i] + n * ddln[i]] = kn[i + nc * i];
    kf[ddlt[i] + n * ddlt[i]] = kt[i + nc * i];
  }


  for (i = 0; i < n; i++)
  {
    kfinv[i + n * i] = 1. / kf[i + n * i];

    if (i < nc)
      kninv[i + nc * i] = 1. / kt[i + nc * i];

  }


  for (i = 0; i < n; i++)
    for (j = 0; j < n; j++)
      DPO[i + n * j] = vec[j * n + i] + kfinv[i + n * j];



  DPOTRF(LA_UP, n, DPO , n, &info2);

  if (info2 != 0)
  {
    if (verbose > 0)
      printf("\n Matter with Cholesky factorization \n");

    free(k);
    free(DPO);
    free(kf);
    free(kfinv);
    free(kninv);
    free(kn);
    free(kt);
    free(kinvwden1);
    free(kzden1);
    free(wc);
    free(zc);
    free(znum1);
    free(wnum1);
    free(wt);
    free(maxzt);
    free(knz);
    free(wtnc);
    free(ktz);
    free(wf);
    free(maxwt);
    free(zt);
    free(vectnt);
    free(ddln);
    free(ddlt);

    *info = 2;
    return;
  }

  /*                Iteration loops                  */


  iter1 = 0;
  err1  = 1.;



  while ((iter1 < itt) && (err1 > errmax))
  {

    /*               Linear stage (zc,wc) -> (z,w)         */

    alpha  = 1.;
    beta   = 1.;
    cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kfinv, n, zc, incx, beta, wc, incy);

    cblas_dcopy(n, qq, incx, znum1, incy);

    alpha = -1.;
    cblas_dscal(n , alpha , znum1 , incx);

    alpha = 1.;
    cblas_daxpy(n, alpha, wc, incx, znum1, incy);

    nrhs = 1;
    DTRTRS(LA_UP, LA_TRANS, LA_NONUNIT, n, nrhs, DPO, n, znum1, n, &info77);

    DTRTRS(LA_UP, LA_NOTRANS, LA_NONUNIT, n, nrhs, DPO, n, znum1, n, &info77);

    cblas_dcopy(n, znum1, incx, reaction, incy);

    alpha = -1.;
    beta = 1.;
    cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kfinv, n, reaction, incx, beta, wc, incy);

    cblas_dcopy(n, wc, incx, velocity, incy);



    /*               Local stage (z,w)->(zc,wc)          */


    for (i = 0; i < n; i++)
    {
      zc[i] = 0.;
      wc[i] = 0.0;
    }


    /*          Normal party                           */



    for (i = 0; i < nc; i++)
    {
      knz0 = 0.;
      for (kk = 0; kk < nc; kk++)
      {
        knz[i] = kt[i + nc * kk] * velocity[ddlt[kk]] + knz0;
        knz0 = knz[i];
      }

      zt[i] = reaction[ddlt[i]] - knz[i];

      if (zt[i] > 0.0)
      {
        zc[ddlt[i]] = zt[i];
        maxzt[i] = 0.0;
      }
      else
      {
        zc[ddlt[i]] = 0.0;
        maxzt[i] = -zt[i];
      }
    }

    for (i = 0; i < nc; i++)
    {
      zc0 = 0.;
      ktz0 = 0.;
      for (j = 0; j < nc; j++)
      {
        wc[ddlt[i]] = kninv[i + nc * j] * maxzt[j] + zc0;
        zc0 = wc[ddlt[i]];
        ktz[i] = kn[i + nc * j] * velocity[ddln[j]] + ktz0;
        ktz0 =  ktz[i];
      }
      wf[i] = reaction[ddln[i]] - ktz[i];
    }


    /*             Loop other nodes              */


    for (ino = 0; ino < nbno; ino++)
    {
      ddl  = ddln[ino];
      nt   = fabs(wf[ino]);


      /*          Tangential vector              */



      if (nt < 1.e-8) tc = 0.;
      else tc = wf[ino] / nt;



      /*               Tangentiel component             */


      wn = zc[ddlt[ino]];

      aa = nt - mu[ino] * wn;

      if (aa > 0.0)
      {
        maxa = aa;
      }
      else
      {
        maxa = 0.0;
      }

      wc[ddl] = (maxa / (-1 * kn[ino + nc * ino])) * tc;

      aa = -nt + mu[ino] * wn;

      if (aa > 0.0)
      {
        maxa = aa;
      }
      else
      {
        maxa = 0.0;
      }

      zc[ddl] = (mu[ino] * wn - maxa) * tc;

    }

    /*               Convergence criterium                */



    cblas_dcopy(n, reaction, incx, znum1, incy);

    alpha = -1.;
    cblas_daxpy(n, alpha, zc, incx, znum1, incy);

    cblas_dcopy(n, velocity, incx, wnum1, incy);

    cblas_daxpy(n, alpha, wc, incx, wnum1, incy);

    alpha  = 1.;
    beta   = 1.;
    cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kf, n, wnum1, incx, beta, znum1, incy);

    num11  = 0.;
    alpha  = 1.;
    beta = 0.;
    cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kfinv, n, znum1, incx, beta, wnum1, incy);

    num11 = cblas_ddot(n, wnum1, incx, znum1, incy);

    cblas_dcopy(n, reaction, incx, znum1, incy);

    alpha  = 1.;
    beta   = 1.;
    cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kf, n, velocity, incx, beta, znum1, incy);

    alpha  = 1.;
    beta   = 0.;
    cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kfinv, n, znum1, incx, beta, wnum1, incy);

    den11  = cblas_ddot(n, wnum1, incx, znum1, incy);

    cblas_dcopy(n, zc, incx, znum1, incy);

    alpha  = 1.;
    beta   = 1.;
    cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kf, n, wc, incx, beta, znum1, incy);

    alpha  = 1.;
    beta   = 0.;
    cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kfinv, n, znum1, incx, beta, wnum1, incy);

    den22  = cblas_ddot(n, znum1, incx, wnum1, incy);

    err0   = num11 / (den11 + den22);

    err1   = sqrt(err0);

    options->iparam[1] = iter1;
    options->dparam[1] = err1;

    iter1   = iter1 + 1;


  }


  if (err1 > errmax)
  {

    if (verbose > 0)
      printf("No convergence after %d iterations, the residue is %g\n", iter1, err1);

    *info = 1;
  }
  else
  {

    if (verbose > 0)
      printf("Convergence after %d iterations, the residue is %g \n", iter1, err1);

    *info = 0;
  }

  free(k);
  free(DPO);
  free(kf);
  free(kfinv);
  free(kninv);
  free(kn);
  free(kt);
  free(kinvwden1);
  free(kzden1);
  free(wc);
  free(zc);
  free(znum1);
  free(wnum1);
  free(wt);
  free(maxzt);
  free(knz);
  free(wtnc);
  free(ktz);
  free(wf);
  free(maxwt);
  free(zt);
  free(vectnt);
  free(ddln);
  free(ddlt);



}
int _globalLineSearchSparseGP(
  GlobalFrictionContactProblem *problem,
  AlartCurnierFun3x3Ptr computeACFun3x3,
  double *solution,
  double *direction,
  double *mu,
  double *rho,
  double *F,
  double *psi,
  CSparseMatrix *J,
  double *tmp,
  double alpha[1],
  unsigned int maxiter_ls)
{
  double inf = 1e10;
  double alphamin = 1e-16;
  double alphamax = inf;

  double m1 = 0.01, m2 = 0.99;

  unsigned int n = (unsigned)NM_triplet(problem->M)->m;

  unsigned int m = problem->H->size1;

  unsigned int problem_size = n+2*m;

  // Computation of q(t) and q'(t) for t =0

  double q0 = 0.5 * cblas_ddot(problem_size, psi, 1, psi, 1);

  //  tmp <- J * direction
  cblas_dscal(problem_size, 0., tmp, 1);
  cs_gaxpy(J, direction, tmp);

  double dqdt0 = cblas_ddot(problem_size, psi, 1, tmp, 1);
  DEBUG_PRINTF("dqdt0=%e\n",dqdt0);
  DEBUG_PRINTF("q0=%e\n",q0);

  for(unsigned int iter = 0; iter < maxiter_ls; ++iter)
  {

    // tmp <- alpha*direction+solution
    cblas_dcopy(problem_size, solution, 1, tmp, 1);
    cblas_daxpy(problem_size, alpha[0], direction, 1, tmp, 1);

    ACPsi(
      problem,
      computeACFun3x3,
      tmp,  /* v */
      tmp+problem->M->size0+problem->H->size1, /* P */
      tmp+problem->M->size0, /* U */
      rho, psi);

    double q  = 0.5 * cblas_ddot(problem_size, psi, 1, psi, 1);

    assert(q >= 0);

    double slope = (q - q0) / alpha[0];

    int C1 = (slope >= m2 * dqdt0);
    int C2 = (slope <= m1 * dqdt0);

    DEBUG_PRINTF("C1=%i\t C2=%i\n",C1,C2);
    if(C1 && C2)
    {
      numerics_printf_verbose(1, "---- GFC3D - NSN_AC - global line search success. Number of ls iteration = %i  alpha = %.10e, q = %.10e",
                              iter,
                              alpha[0], q);
      
      return 0;

    }
    else if(!C1)
    {
      alphamin = alpha[0];
    }
    else
    {
      // not(C2)
      alphamax = alpha[0];
    }

    if(alpha[0] < inf)
    {
      alpha[0] = 0.5 * (alphamin + alphamax);
    }
    else
    {
      alpha[0] = alphamin;
    }

  }
  numerics_printf_verbose(1,"---- GFC3D - NSN_AC - global line search unsuccessful. Max number of ls iteration reached  = %i  with alpha = %.10e",
                  maxiter_ls, alpha[0]);
  

  return -1;
}
void fc3d_ProjectedGradientOnCylinder(FrictionContactProblem* problem, double *reaction, double *velocity, int* info, SolverOptions* options)
{
  /* int and double parameters */
  int* iparam = options->iparam;
  double* dparam = options->dparam;
  /* Number of contacts */
  int nc = problem->numberOfContacts;
  double* q = problem->q;
  NumericsMatrix* M = problem->M;
  /* Dimension of the problem */
  int n = 3 * nc;
  /* Maximum number of iterations */
  int itermax = iparam[0];
  /* Tolerance */
  double tolerance = dparam[0];




  /*****  Projected Gradient iterations *****/
  int j, iter = 0; /* Current iteration number */
  double error = 1.; /* Current error */
  int hasNotConverged = 1;
  int contact; /* Number of the current row of blocks in M */
  int nLocal = 3;
  dparam[0] = dparam[2]; // set the tolerance for the local solver
  double * velocitytmp = (double *)malloc(n * sizeof(double));

  double rho = 0.0;
  int isVariable = 0;
  double rhoinit, rhomin;
  if (dparam[3] > 0.0)
  {
    rho = dparam[3];
  }
  else
  {
    /* Variable step in fixed*/
    isVariable = 1;
    printf("Variable step (line search) in Projected Gradient iterations\n");
    rhoinit = dparam[3];
    rhomin = dparam[4];
  }

  double * reactionold;
  double * direction;
  if (isVariable)
  {
    reactionold = (double *)malloc(n * sizeof(double));
    direction = (double *)malloc(n * sizeof(double));
  }
  double alpha = 1.0;
  double beta = 1.0;

  /*   double minusrho  = -1.0*rho; */

  if (!isVariable)
  {
    while ((iter < itermax) && (hasNotConverged > 0))
    {
      ++iter;
      cblas_dcopy(n , q , 1 , velocitytmp, 1);
      prodNumericsMatrix(n, n, alpha, M, reaction, beta, velocitytmp);
      // projection for each contact
      cblas_daxpy(n, -1.0, velocitytmp, 1, reaction , 1);
      for (contact = 0 ; contact < nc ; ++contact)
        projectionOnCylinder(&reaction[ contact * nLocal],
                             options->dWork[contact]);

#ifdef VERBOSE_DEBUG

      printf("reaction before LS\n");
      for (contact = 0 ; contact < nc ; ++contact)
      {
        for (j = 0; j < 3; j++)
          printf("reaction[%i] = %le\t", 3 * contact + j, reaction[3 * contact + j]);
        printf("\n");
      }
      printf("velocitytmp before LS\n");
      for (contact = 0 ; contact < nc ; ++contact)
      {
        for (j = 0; j < 3; j++)
          printf("velocitytmp[%i] = %le\t", 3 * contact + j, velocitytmp[3 * contact + j]);
        printf("\n");
      }
#endif
      /* **** Criterium convergence **** */
      fc3d_Tresca_compute_error(problem, reaction , velocity, tolerance, options, &error);

      if (options->callback)
      {
        options->callback->collectStatsIteration(options->callback->env, nc * 3, 
                                        reaction, velocity, 
                                        error, NULL);
      }

      if (verbose > 0)
        printf("----------------------------------- FC3D - Projected Gradient On Cylinder (PGoC) - Iteration %i rho = %14.7e \tError = %14.7e\n", iter, rho, error);

      if (error < tolerance) hasNotConverged = 0;
      *info = hasNotConverged;
    }
  }
  else
  {
    rho =  rhoinit;


    cblas_dcopy(n , q , 1 , velocitytmp, 1);
    prodNumericsMatrix(n, n, 1.0, M, reaction, 1.0, velocitytmp);

    cblas_daxpy(n, rho, velocitytmp, 1, reaction, 1);

    for (contact = 0 ; contact < nc ; ++contact)
      projectionOnCylinder(&reaction[contact * nLocal],
                           options->dWork[contact]);
    cblas_dcopy(n , q , 1 , velocitytmp, 1);
    prodNumericsMatrix(n, n, 1.0, M, reaction, 1.0, velocitytmp);

    double oldcriterion = cblas_ddot(n, reaction, 1, velocitytmp, 1);
#ifdef VERBOSE_DEBUG
    printf("oldcriterion =%le \n", oldcriterion);
#endif


    while ((iter < itermax) && (hasNotConverged > 0))
    {
      ++iter;
      // store the old reaction
      cblas_dcopy(n , reaction , 1 , reactionold , 1);
      // compute the direction
      cblas_dcopy(n , q , 1 , velocitytmp, 1);
      prodNumericsMatrix(n, n, 1.0, M, reaction, 1.0, velocitytmp);
      cblas_dcopy(n, velocitytmp, 1, direction, 1);

      // start line search
      j = 0;

      if (rho <= 100 * rhoinit) rho = 10.0 * rho;

      double newcriterion = 1e24;
      do
      {





        cblas_dcopy(n , reactionold , 1 , reaction , 1);
        cblas_daxpy(n, rho, direction, 1, reaction , 1) ;
#ifdef VERBOSE_DEBUG
        printf("LS iteration %i step 0 \n", j);
        printf("rho = %le \n", rho);
        for (contact = 0 ; contact < nc ; ++contact)
        {
          for (int k = 0; k < 3; k++)
            printf("reaction[%i] = %le\t",
                   3 * contact + k, reaction[3 * contact + k]);
          printf("\n");
        }
#endif
        for (contact = 0 ; contact < nc ; ++contact)
          projectionOnCylinder(&reaction[contact * nLocal],
                               options->dWork[contact]);
        /*          printf("options->dWork[%i] = %le\n",contact, options->dWork[contact]  );} */
#ifdef VERBOSE_DEBUG
        printf("LS iteration %i step 1 after projection\n", j);
        for (contact = 0 ; contact < nc ; ++contact)
        {
          for (int k = 0; k < 3; k++)
            printf("reaction[%i] = %le\t",
                   3 * contact + k, reaction[3 * contact + k]);
          printf("\n");
        }
#endif
        cblas_dcopy(n , q , 1 , velocitytmp, 1);
        prodNumericsMatrix(n, n, 1.0, M, reaction, 1.0, velocitytmp);

#ifdef VERBOSE_DEBUG
        printf("LS iteration %i step 3 \n", j);
        for (contact = 0 ; contact < nc ; ++contact)
        {
          for (int k = 0; k < 3; k++)
            printf("velocitytmp[%i] = %le\t", 3 * contact + k, velocitytmp[3 * contact + k]);
          printf("\n");
        }
#endif


        newcriterion = cblas_ddot(n, reaction, 1, velocitytmp, 1);

#ifdef VERBOSE_DEBUG
        printf("LS iteration %i newcriterion =%le\n", j, newcriterion);
#endif
        if (rho > rhomin)
        {
          rho = rhomin;
          break;
        }

        rho = 0.5 * rho;
      }
      while (newcriterion > oldcriterion &&
             ++j <= options->iparam[2]);
      oldcriterion = newcriterion;

      /* **** Criterium convergence **** */
      fc3d_Tresca_compute_error(problem, reaction , velocity, tolerance, options, &error);

      if (verbose > 0)
        printf("----------------------------------- FC3D - Projected Gradient On Cylinder (PGoC) - Iteration %i rho = %14.7e \tError = %14.7e\n", iter, rho, error);

      if (error < tolerance) hasNotConverged = 0;
      *info = hasNotConverged;
    }
  }





  printf("----------------------------------- FC3D - Projected Gradient On Cylinder (PGoC)- #Iteration %i Final Residual = %14.7e\n", iter, error);
  dparam[0] = tolerance;
  dparam[1] = error;
  free(velocitytmp);
  if (isVariable)
  {
    free(reactionold);
    free(direction);
  }
}
void variationalInequality_ExtraGradient(VariationalInequality* problem, double *x, double *w, int* info, SolverOptions* options)
{
  /* /\* int and double parameters *\/ */
  int* iparam = options->iparam;
  double* dparam = options->dparam;
  /* Number of contacts */
  int n = problem->size;
  /* Maximum number of iterations */
  int itermax = iparam[0];
  /* Tolerance */
  double tolerance = dparam[0];


  /*****  Fixed point iterations *****/
  int iter = 0; /* Current iteration number */
  double error = 1.; /* Current error */
  int hasNotConverged = 1;
  dparam[0] = dparam[2]; // set the tolerance for the local solver


  double * xtmp = (double *)malloc(n * sizeof(double));
  double * wtmp = (double *)malloc(n * sizeof(double));

  double rho = 0.0, rho_k =0.0;
  int isVariable = 0;

  if (dparam[3] > 0.0)
  {
    rho = dparam[3];
    if (verbose > 0)
    {
      printf("----------------------------------- VI - Extra Gradient (EG) - Fixed stepsize with  rho = %14.7e \n", rho);
    }
  }
  else
  {
    /* Variable step in iterations*/
    isVariable = 1;
    rho = -dparam[3];
    if (verbose > 0)
    {
      printf("----------------------------------- VI - Extra Gradient (EG) - Variable stepsize with starting rho = %14.7e \n", rho);
    }

  }

  /* Variable for Line_search */
  int success =0;
  double error_k, light_error_sum =0.0;
  int ls_iter = 0;
  int ls_itermax = 10;
  double tau=dparam[4], tauinv=dparam[5], L= dparam[6], Lmin = dparam[7];
  double a1=0.0, a2=0.0;
  double * x_k =0;
  double * w_k =0;

  if (isVariable)
  {
    x_k = (double *)malloc(n * sizeof(double));
    w_k = (double *)malloc(n * sizeof(double));
  }

  //isVariable=0;
  if (!isVariable)
  {
    /*   double minusrho  = -1.0*rho; */
    while ((iter < itermax) && (hasNotConverged > 0))
    {
      ++iter;

      /* xtmp <- x  */
      cblas_dcopy(n , x , 1 , xtmp, 1);

      /* wtmp <- F(xtmp) */
      problem->F(problem, n, xtmp,wtmp);

      /* xtmp <- xtmp - F(xtmp) */
      cblas_daxpy(n, -1.0, wtmp , 1, xtmp , 1) ;

      /* wtmp <-  ProjectionOnX(xtmp) */
      problem->ProjectionOnX(problem,xtmp,wtmp);

      /* x <- x - wtmp */
      cblas_daxpy(n, -1.0, wtmp , 1, x , 1) ;

      /* x <-  ProjectionOnX(x) */
      cblas_dcopy(n , xtmp , 1 , x, 1);

      problem->ProjectionOnX(problem,xtmp,x);


      /* problem->F(problem,x,w); */
      /* cblas_daxpy(n, -1.0, w , 1, x , 1) ; */
      /* cblas_dcopy(n , x , 1 , xtmp, 1); */
      /* problem->ProjectionOnX(problem,xtmp,x); */

      /* **** Criterium convergence **** */
      if (options->iparam[SICONOS_VI_ERROR_EVALUATION] == SICONOS_VI_ERROR_EVALUATION_FULL )
      {
        variationalInequality_computeError(problem, x , w, tolerance, options, &error);
      }
      else if (options->iparam[SICONOS_VI_ERROR_EVALUATION] == SICONOS_VI_ERROR_EVALUATION_LIGHT )
      {
        cblas_dcopy(n, xtmp, 1,x , 1) ;
        cblas_daxpy(n, -1.0, x_k , 1, xtmp , 1) ;
        light_error_sum = cblas_dnrm2(n,xtmp,1);
        double norm_x= cblas_dnrm2(n,x,1);
        if (fabs(norm_x) > DBL_EPSILON)
          light_error_sum /= norm_x;
        error=light_error_sum;
      }

      if (options->callback)
      {
        options->callback->collectStatsIteration(options->callback->env, n,
                                        x, w,
                                        error, NULL);
      }

      if (verbose > 0)
      {
        printf("----------------------------------- VI - Extra Gradient (EG) - Iteration %i rho = %14.7e \tError = %14.7e\n", iter, rho, error);
      }
      if (error < tolerance) hasNotConverged = 0;
      *info = hasNotConverged;
    }
  }

  if (isVariable)
  {
    if (iparam[1]==0)/* Armijo rule with Khotbotov ratio (default)   */
    {
      while ((iter < itermax) && (hasNotConverged > 0))
      {
        ++iter;


        /* Store the error */
        error_k = error;

        /* x_k <-- x store the x at the beginning of the iteration */
        cblas_dcopy(n , x , 1 , x_k, 1);

        problem->F(problem, n, x, w_k);

        ls_iter = 0 ;
        success =0;
        rho_k=rho / tau;

        while (!success && (ls_iter < ls_itermax))
        {
          /* if (iparam[3] && ls_iter !=0) rho_k = rho_k * tau * min(1.0,a2/(rho_k*a1)); */
          /* else */ rho_k = rho_k * tau ;

          /* x <- x_k  for the std approach*/
          if (iparam[2]==0) cblas_dcopy(n, x_k, 1, x , 1) ;

          /* x <- x - rho_k*  w_k */
          cblas_daxpy(n, -rho_k, w_k , 1, x , 1) ;

          /* xtmp <-  ProjectionOnX(x) */
          problem->ProjectionOnX(problem,x,xtmp);
          problem->F(problem,n,xtmp,w);

          DEBUG_EXPR_WE( for (int i =0; i< 5 ; i++) { printf("xtmp[%i]=%12.8e\t",i,xtmp[i]);
              printf("w[%i]=F[%i]=%12.8e\n",i,i,w[i]);});
          /* velocitytmp <- velocity */
          /* cblas_dcopy(n, w, 1, wtmp , 1) ; */

          /* velocity <- velocity - velocity_k   */
          cblas_daxpy(n, -1.0, w_k , 1, w , 1) ;

          /* a1 =  ||w - w_k|| */
          a1 = cblas_dnrm2(n, w, 1);
          DEBUG_PRINTF("a1 = %12.8e\n", a1);

          /* xtmp <- x */
          cblas_dcopy(n, xtmp, 1,x , 1) ;

          /* xtmp <- x - x_k   */
          cblas_daxpy(n, -1.0, x_k , 1, xtmp , 1) ;

          /* a2 =  || x - x_k || */
          a2 = cblas_dnrm2(n, xtmp, 1) ;
          DEBUG_PRINTF("a2 = %12.8e\n", a2);

          DEBUG_PRINTF("test rho_k*a1 < L * a2 = %e < %e\n", rho_k*a1 , L * a2 ) ;
          success = (rho_k*a1 < L * a2)?1:0;

          /* printf("rho_k = %12.8e\t", rho_k); */
          /* printf("a1 = %12.8e\t", a1); */
          /* printf("a2 = %12.8e\t", a2); */
          /* printf("norm x = %12.8e\t",cblas_dnrm2(n, x, 1) ); */
          /* printf("success = %i\n", success); */

          ls_iter++;
        }

        /* velocitytmp <- q  */
        /* cblas_dcopy(n , q , 1 , velocitytmp, 1); */
        /* prodNumericsMatrix(n, n, alpha, M, reaction, beta, velocitytmp); */

        problem->F(problem, n, x,wtmp);

        /* x <- x - rho_k*  wtmp */
        cblas_daxpy(n, -rho_k, wtmp , 1, x , 1) ;

        /* wtmp <-  ProjectionOnX(xtmp) */
        cblas_dcopy(n , x , 1 , xtmp, 1);
        problem->ProjectionOnX(problem,xtmp,x);
        DEBUG_EXPR_WE( for (int i =0; i< 5 ; i++)
                       {
                         printf("x[%i]=%12.8e\t",i,x[i]);    printf("w[%i]=F[%i]=%12.8e\n",i,i,w[i]);
                       }
          );



        /* **** Criterium convergence **** */
        if (options->iparam[SICONOS_VI_ERROR_EVALUATION] == SICONOS_VI_ERROR_EVALUATION_FULL )
        {
          variationalInequality_computeError(problem, x , w, tolerance, options, &error);
        }
        else if (options->iparam[SICONOS_VI_ERROR_EVALUATION] == SICONOS_VI_ERROR_EVALUATION_LIGHT )
        {
          cblas_dcopy(n, xtmp, 1,x , 1) ;
          cblas_daxpy(n, -1.0, x_k , 1, xtmp , 1) ;
          light_error_sum = cblas_dnrm2(n,xtmp,1);
          double norm_x= cblas_dnrm2(n,x,1);
          if (fabs(norm_x) > DBL_EPSILON)
            light_error_sum /= norm_x;
          error=light_error_sum;
        }

        DEBUG_PRINTF("error = %12.8e\t error_k = %12.8e\n",error,error_k);
        /*Update rho*/
        if ((rho_k*a1 < Lmin * a2) && (error < error_k))
        {
          rho =rho_k*tauinv;
        }
        else
          rho =rho_k;


        if (verbose > 0)
        {
          printf("----------------------------------- VI - Extra Gradient (EG) - Iteration %i rho = %14.7e \tError = %14.7e\n", iter, rho, error);
        }
        if (error < tolerance) hasNotConverged = 0;
        *info = hasNotConverged;
      }
    }// end iparam[1]==0
/* Linesearch */
int linesearch2_Armijo(int n, double *z, double psi_k, double descentCondition)
{

  /* IN :
     psi_k (merit function for current iteration)
     jacobian_psi_k (jacobian of the merit function)
     dk: descent direction

     OUT: tk, z
  */

  double m1 = 0.1;
  double tk = 1;
  double tkl, tkr, tkaux;
  int incx = 1, incy = 1;
  double merit, merit_k;
  double tmin = 1e-14;
  double qp_tk;

  /*  cblas_dcopy(sN, z, incx,sz2,incx);*/

  /* z1 = z0 + dir */
  /*  cblas_daxpy(n , 1.0 , sdir_descent , incx , z , incy );*/

  tk = 3.25;


  while (tk > tmin)
  {

    /* Computes merit function = 1/2*norm(phi(z_{k+1}))^2 */
    cblas_dcopy(sN, z, incx, sz2, incx);
    cblas_daxpy(n , tk , sdir_descent , incx , sz2 , incy);


    (*sFphi)(n, sz2, sphi_z, 0);
    merit =  cblas_dnrm2(n, sphi_z , incx);
    merit = 0.5 * merit * merit;
    merit_k = psi_k + m1 * tk * descentCondition;
    if (merit < merit_k)
    {
      tkl = 0;
      tkr = tk;

      /*calcul merit'(tk)*/
      (*sFjacobianPhi)(sN, sz2, sjacobianPhi_z, 1);
      /* Computes the jacobian of the merit function, jacobian_psi = transpose(jacobianPhiMatrix).phiVector */
      cblas_dgemv(CblasColMajor,CblasTrans, sN, sN, 1.0, sjacobianPhi_z, sN, sphi_z, incx, 0.0, sgrad_psi_zaux, incx);
      qp_tk = cblas_ddot(sN, sgrad_psi_zaux, 1, sdir_descent, 1);

      if (qp_tk > 0)
      {
        while (fabs(tkl - tkr) > tmin)
        {
          tkaux = 0.5 * (tkl + tkr);
          cblas_dcopy(sN, z, incx, sz2, incx);
          cblas_daxpy(n , tkaux , sdir_descent , incx , sz2 , incy);
          /*calcul merit'(tk)*/
          (*sFphi)(n, sz2, sphi_z, 0);
          (*sFjacobianPhi)(sN, sz2, sjacobianPhi_z, 1);
          /* Computes the jacobian of the merit function, jacobian_psi = transpose(jacobianPhiMatrix).phiVector */
          cblas_dgemv(CblasColMajor,CblasTrans, sN, sN, 1.0, sjacobianPhi_z, sN, sphi_z, incx, 0.0, sgrad_psi_zaux, incx);
          qp_tk = cblas_ddot(sN, sgrad_psi_zaux, 1, sdir_descent, 1);
          if (qp_tk > 0)
          {
            tkr = tkaux;
          }
          else
          {
            tkl = tkaux;
          }
        }
      }

      /* printf("merit = %e, merit_k=%e,tk= %e,tkaux=%e \n",merit,merit_k,tk,tkaux);*/
      cblas_dcopy(sN, sz2, incx, z, incx);
      break;
    }
    tk = tk * 0.5;
  }
  if (tk <= tmin)
  {
    cblas_dcopy(sN, sz2, incx, z, incx);
    printf("NonSmoothNewton::linesearch2_Armijo warning, resulting tk=%e < tmin, linesearch stopped.\n", tk);
    return 0;

  }
  return 1;

}
Exemple #24
0
double search_Armijo_standalone(int n, double* theta, double preRHS, search_data* ls_data)
{
  assert(ls_data->alpha0 > 0.0);
  assert(ls_data->alpha0 > ls_data->alpha_min);
  double alpha = ls_data->alpha0;
  double theta_iter = *theta, theta_ref = *theta;
  double* z = ls_data->z;
  double* zc = ls_data->zc;
  double* F = ls_data->F;
  double* F_merit = ls_data->F_merit;
  double* desc_dir = ls_data->desc_dir;
  void* data = ls_data->data;
  bool arcsearch = ls_data->searchtype == ARCSEARCH;
  void* set = ls_data->set;
  double RHS;

  armijo_extra_params* aep = (armijo_extra_params*) ls_data->extra_params;
  assert(aep);
  preRHS *= aep->gamma;

  while (alpha >= ls_data->alpha_min)
  {
    DEBUG_PRINTF("search_Armijo :: alpha %g, ls_data->alpha_min %g \n", alpha, ls_data->alpha_min);

     // desc_dir contains the direction d
     cblas_dcopy(n, z, 1, zc, 1);
     cblas_daxpy(n, alpha, desc_dir, 1, zc, 1);     //  z + alpha*d --> z
     if (arcsearch)
     {
       project_on_set(n, zc, set);
       /* we use F as a work vector here */
       cblas_dcopy(n, z, 1, F, 1);
       cblas_daxpy(n, -1.0, zc, 1, F, 1); /* F = z(0) - z(alpha) */
       /* warning desc_dir = -JacMerit !*/
       double dotprod = cblas_ddot(n, desc_dir, 1, F, 1);
       if (dotprod > 0.0)
         RHS = ls_data->sigma*dotprod;
       else
         RHS = -alpha*ls_data->sigma*theta_ref;
     }
     else
     {
       RHS = alpha*preRHS;
     }

     // compute new F_merit
     ls_data->compute_F(data, zc, F);
     ls_data->compute_F_merit(data, zc, F, F_merit);

     DEBUG_PRINT("z ");
     DEBUG_EXPR_WE(for (unsigned int i = 0; i < n; ++i)
         { DEBUG_PRINTF("% 2.2e ", zc[i]) }
         DEBUG_PRINT("\n"));
 
     DEBUG_PRINT("F ");
     DEBUG_EXPR_WE(for (unsigned int i = 0; i < n; ++i)
         { DEBUG_PRINTF("% 2.2e ", F[i]) }
         DEBUG_PRINT("\n"));
 
     DEBUG_PRINT("F_merit ");
     DEBUG_EXPR_WE(for (unsigned int i = 0; i < n; ++i)
         { DEBUG_PRINTF("% 2.2e ", F_merit[i]) }
         DEBUG_PRINT("\n"));
 
     theta_iter = 0.5 * cblas_ddot(n, F_merit, 1, F_merit, 1);
 
     DEBUG_PRINTF("search_Armijo :: alpha %g\n", alpha);
     DEBUG_PRINTF("search_Armijo :: theta_iter %.*e ; theta_ref %.*e  \n", DECIMAL_DIG, theta_iter, DECIMAL_DIG, theta_ref);
 
     // acceptance test
     if (theta_iter <= theta_ref + RHS)
     {
       if (verbose > 1)
         printf("search_Armijo :: alpha %g\n", alpha);
       break;
     }
     else
     {
       // alpha too large, decrease it
       alpha /= 2.0;
     }
  }
  *theta = theta_iter;
  return alpha;
}
int nonSmoothNewtonNeigh(int n, double* z, NewtonFunctionPtr* phi, NewtonFunctionPtr* jacobianPhi, int* iparam, double* dparam)
{


  int itermax = iparam[0]; // maximum number of iterations allowed
  int iterMaxWithSameZ = itermax / 4;
  int niter = 0; // current iteration number
  double tolerance = dparam[0];
  /*   double coef; */
  sFphi = phi;
  sFjacobianPhi = jacobianPhi;
  //  verbose=1;
  if (verbose > 0)
  {
    printf(" ============= Starting of Newton process =============\n");
    printf(" - tolerance: %14.7e\n - maximum number of iterations: %i\n", tolerance, itermax);
  }

  int incx = 1;
  /*   int n2 = n*n; */
  int infoDGESV;

  /** merit function and its jacobian */
  double psi_z;

  /** The algorithm is alg 4.1 of the paper of Kanzow and Kleinmichel, "A new class of semismooth Newton-type methods
      for nonlinear complementarity problems", in Computational Optimization and Applications, 11, 227-251 (1998).

      We try to keep the same notations
  */

  double rho = 1e-8;
  double descentCondition, criterion, norm_jacobian_psi_z, normPhi_z;
  double p = 2.1;
  double terminationCriterion = 1;
  double norm;
  int findNewZ, i, j, NbLookingForANewZ;
  /*   int naux=0; */
  double aux = 0;
  /*   double aux1=0; */
  int ii;
  int resls = 1;
  /*   char c; */
  /*  double * oldz; */
  /*  oldz=(double*)malloc(n*sizeof(double));*/

  NbLookingForANewZ = 0;

  /** Iterations ... */
  while ((niter < itermax) && (terminationCriterion > tolerance))
  {
    scmp++;
    ++niter;
    /** Computes phi and its jacobian */
    if (sZsol)
    {
      for (ii = 0; ii < sN; ii++)
        szzaux[ii] = sZsol[ii] - z[ii];
      printf("dist zzsol %.32e.\n", cblas_dnrm2(n, szzaux, 1));
    }

    (*sFphi)(n, z, sphi_z, 0);
    (*sFjacobianPhi)(n, z, sjacobianPhi_z, 1);
    /* Computes the jacobian of the merit function, jacobian_psi = transpose(jacobianPhiMatrix).phiVector */
    cblas_dgemv(CblasColMajor,CblasTrans, n, n, 1.0, sjacobianPhi_z, n, sphi_z, incx, 0.0, sgrad_psi_z, incx);
    norm_jacobian_psi_z = cblas_dnrm2(n, sgrad_psi_z, 1);

    /* Computes norm2(phi) */
    normPhi_z = cblas_dnrm2(n, sphi_z, 1);
    /* Computes merit function */
    psi_z = 0.5 * normPhi_z * normPhi_z;

    if (normPhi_z < tolerance)
    {
      /*it is the solution*/
      terminationCriterion = tolerance / 2.0;
      break;
    }

    if (verbose > 0)
    {
      printf("Non Smooth Newton, iteration number %i, norm grad psi= %14.7e , psi = %14.7e, normPhi = %e .\n", niter, norm_jacobian_psi_z, psi_z, normPhi_z);
      printf(" -----------------------------------------------------------------------\n");
    }

    NbLookingForANewZ++;

    if (niter > 2)
    {
      if (10 * norm_jacobian_psi_z < tolerance || !resls || NbLookingForANewZ > iterMaxWithSameZ)
      {
        NbLookingForANewZ = 0;
        resls = 1;
        /*   if (NbLookingForANewZ % 10 ==1 && 0){
          printf("Try NonMonotomnelineSearch\n");
          cblas_dcopy(n,sgrad_psi_z,1,sdir_descent,1);
          cblas_dscal( n , -1.0 ,sdir_descent,incx);
          NonMonotomnelineSearch( z,  phi, 10);
          continue;
        }
        */

        /* FOR DEBUG ONLY*/
        if (sZsol)
        {
          printf("begin plot prev dir\n");
          plotMerit(z, 0, 0);
          printf("end\n");
          /*     gets(&c);*/
          (*sFphi)(n, sZsol, szaux, 0);
          printf("value psi(zsol)=%e\n", cblas_dnrm2(n, szaux, 1));
          cblas_dcopy(n, sZsol, incx, szaux, incx);
          cblas_daxpy(n , -1 , z , 1 , szaux , 1);
          printf("dist to sol %e \n", cblas_dnrm2(n, szaux, 1));
          for (ii = 0; ii < n; ii++)
            sdir_descent[ii] = sZsol[ii] - z[ii];

          aux = norm;
          norm = 1;
          printf("begin plot zzsol dir\n");
          plotMerit(z, 0, 0);
          printf("end\n");
          /*     gets(&c);*/
          norm = aux;
        }

        printf("looking for a new Z...\n");
        /*may be a local minimal*/
        /*find a gradiant going out of this cul-de-sac.*/
        norm = n / 2;
        findNewZ = 0;
        for (j = 0; j < 20; j++)
        {

          for (i = 0; i < n; i++)
          {
            if (sZsol)
            {
              /* FOR DEBUG ONLY*/
              (*sFphi)(n, sZsol, sphi_zaux, 0);
              norm = cblas_dnrm2(n, sphi_zaux, 1);
              printf("Norm of the sol %e.\n", norm);

              for (ii = 0; ii < n; ii++)
                sdir_descent[ii] = sZsol[ii] - z[ii];
              norm = 1;
            }
            else
            {
              for (ii = 0; ii < n; ii++)
              {
                sdir_descent[ii] = 1.0 * rand();
              }
              cblas_dscal(n, 1 / cblas_dnrm2(n, sdir_descent, 1), sdir_descent, incx);
              cblas_dscal(n, norm, sdir_descent, incx);
            }
            cblas_dcopy(n, z, incx, szaux, incx);
            // cblas_dscal(n,0.0,zaux,incx);
            /* zaux = z + dir */
            cblas_daxpy(n , norm , sdir_descent , 1 , szaux , 1);
            /* Computes the jacobian of the merit function, jacobian_psi_zaux = transpose(jacobianPhi_zaux).phi_zaux */
            (*sFphi)(n, szaux, sphi_zaux, 0);
            (*sFjacobianPhi)(n, szaux, sjacobianPhi_zaux, 1);

            /* FOR DEBUG ONLY*/
            if (sZsol)
            {
              aux = cblas_dnrm2(n, sphi_zaux, 1);
              printf("Norm of the sol is now %e.\n", aux);
              for (ii = 0; ii < n; ii++)
                printf("zsol %e zaux %e \n", sZsol[ii], szaux[ii]);
            }


            cblas_dgemv(CblasColMajor, CblasTrans, n, n, 1.0, sjacobianPhi_zaux, n, sphi_zaux, incx, 0.0, sgrad_psi_zaux, incx);
            cblas_dcopy(n, szaux, 1, szzaux, 1);
            cblas_daxpy(n , -1 , z , incx , szzaux , incx);
            /*zzaux must be a descente direction.*/
            /*ie jacobian_psi_zaux.zzaux <0
            printf("jacobian_psi_zaux : \n");*/
            /*cblas_dcopy(n,sdir,incx,sdir_descent,incx);
            plotMerit(z, phi);*/


            aux = cblas_ddot(n, sgrad_psi_zaux, 1, szzaux, 1);
            /*       aux1 = cblas_dnrm2(n,szzaux,1);
            aux1 = cblas_dnrm2(n,sgrad_psi_zaux,1);*/
            aux = aux / (cblas_dnrm2(n, szzaux, 1) * cblas_dnrm2(n, sgrad_psi_zaux, 1));
            /*       printf("aux: %e\n",aux);*/
            if (aux < 0.1 * (j + 1))
            {
              //zaux is the new point.
              findNewZ = 1;
              cblas_dcopy(n, szaux, incx, z, incx);
              break;
            }
          }
          if (findNewZ)
            break;
          if (j == 10)
          {
            norm = n / 2;
          }
          else if (j > 10)
            norm = -2 * norm;
          else
            norm = -norm / 2.0;
        }
        if (! findNewZ)
        {
          printf("failed to find a new z\n");
          /* exit(1);*/
          continue;

        }
        else
          continue;
      }
    }

    /* Stops if the termination criterion is satisfied */
    terminationCriterion = norm_jacobian_psi_z;
    /*      if(terminationCriterion < tolerance){
    break;
    }*/

    /* Search direction calculation
    Find a solution dk of jacobianPhiMatrix.d = -phiVector.
    dk is saved in phiVector.
    */
    cblas_dscal(n , -1.0 , sphi_z, incx);
    DGESV(n, 1, sjacobianPhi_z, n, sipiv, sphi_z, n, &infoDGESV);
    if (infoDGESV)
    {
      printf("DGEV error %d.\n", infoDGESV);
    }
    cblas_dcopy(n, sphi_z, 1, sdir_descent, 1);
    criterion = cblas_dnrm2(n, sdir_descent, 1);
    /*      printf("norm dir descent %e\n",criterion);*/

    /*printf("begin plot descent dir\n");
    plotMerit(z, phi);
    printf("end\n");
          gets(&c);*/

    /*printf("begin plot zzsol dir\n");
    plotMeritToZsol(z,phi);
    printf("end\n");
          gets(&c);*/


    /*
    norm = cblas_dnrm2(n,sdir_descent,1);
    printf("norm desc %e \n",norm);
    cblas_dscal( n , 1/norm , sdir_descent, 1);
    */
    /* descentCondition = jacobian_psi.dk */
    descentCondition = cblas_ddot(n, sgrad_psi_z,  1,  sdir_descent, 1);

    /* Criterion to be satisfied: error < -rho*norm(dk)^p */
    criterion = -rho * pow(criterion, p);
    /*      printf("ddddddd %d\n",scmp);
    if (scmp>100){
    displayMat(sjacobianPhi_z,n,n,n);
    exit(1);
    }*/

//    if ((infoDGESV != 0 || descentCondition > criterion) && 0)
//    {
//      printf("no a desc dir, get grad psy\n");
      /* dk = - jacobian_psi (remind that dk is saved in phi_z) */
//      cblas_dcopy(n, sgrad_psi_z, 1, sdir_descent, 1);
//      cblas_dscal(n , -1.0 , sdir_descent, incx);
      /*DEBUG ONLY*/
      /*printf("begin plot new descent dir\n");
      plotMerit(z);
      printf("end\n");
       gets(&c);*/
//    }
    /*      coef=fabs(norm_jacobian_psi_z*norm_jacobian_psi_z/descentCondition);
    if (coef <1){
    cblas_dscal(n,coef,sdir_descent,incx);
    printf("coef %e norm dir descent is now %e\n",coef,cblas_dnrm2(n,sdir_descent,1));
    }*/


    /* Step-3 Line search: computes z_k+1 */
    /*linesearch_Armijo(n,z,sdir_descent,psi_z, descentCondition, phi);*/
    /*            if (niter == 10){
    printf("begin plot new descent dir\n");
    plotMerit(z);
    printf("end\n");
     gets(&c);
    }*/
    /*      memcpy(oldz,z,n*sizeof(double));*/

    resls = linesearch2_Armijo(n, z, psi_z, descentCondition);
    if (!resls && niter > 1)
    {

      /* displayMat(sjacobianPhi_z,n,n,n);
      printf("begin plot new descent dir\n");
      plotMerit(oldz,psi_z, descentCondition);
      printf("end\n");
      gets(&c);*/
    }


    /*      lineSearch_Wolfe(z, descentCondition, phi,jacobianPhi);*/
    /*      if (niter>3){
    printf("angle between prev dir %e.\n",acos(cblas_ddot(n, sdir_descent,  1,  sPrevDirDescent, 1)/(cblas_dnrm2(n,sdir_descent,1)*cblas_dnrm2(n,sPrevDirDescent,1))));
    }*/
    cblas_dcopy(n, sdir_descent, 1, sPrevDirDescent, 1);

    /*      for (j=20;j<32;j++){
    if (z[j]<0)
    z[j]=0;
    }*/

    /*      if( 1 || verbose>0)
    {
     printf("Non Smooth Newton, iteration number %i, error grad equal to %14.7e , psi value is %14.7e .\n",niter, terminationCriterion,psi_z);
       printf(" -----------------------------------------------------------------------\n");
       }*/
  }

  /* Total number of iterations */
  iparam[1] = niter;
  /* Final error */
  dparam[1] = terminationCriterion;

  /** Free memory*/

  if (verbose > 0)
  {
    if (dparam[1] > tolerance)
      printf("Non Smooth Newton warning: no convergence after %i iterations\n" , niter);

    else
      printf("Non Smooth Newton: convergence after %i iterations\n" , niter);
    printf(" The residue is : %e \n", dparam[1]);
  }

  /*  free(oldz);*/

  if (dparam[1] > tolerance)
    return 1;
  else return 0;
}
void My_daxpy(gsl_vector* y, const gsl_vector* x, double alpha)
{
	cblas_daxpy(y->size, alpha, x->data, x->stride, y->data, y->stride);
}
Exemple #27
0
/* Ref: Weiss, Algorithm 12 BiCGSTAB
 * INPUT
 *   n : dimension of the problem
 *   b [n] : r-h-s vector
 *   atimes (int n, static double *x, double *b, void *param) :
 *        calc matrix-vector product A.x = b.
 *   atimes_param : parameters for atimes().
 *   it : struct iter. following entries are used
 *        it->max = kend : max of iteration
 *        it->eps = eps  : criteria for |r^2|/|b^2|
 * OUTPUT
 *   returned value : 0 == success, otherwise (-1) == failed
 *   x [n] : solution
 *   it->niter : # of iteration
 *   it->res2  : |r^2| / |b^2|
 */
int
bicgstab (int n, const double *b, double *x,
	  void (*atimes) (int, const double *, double *, void *),
	  void *atimes_param,
	  struct iter *it)
{
#ifndef HAVE_CBLAS_H
# ifdef HAVE_BLAS_H
  /* use Fortran BLAS routines */

  int i_1 = 1;
  double d_1 = 1.0;
  double d_m1 = -1.0;

# endif // !HAVE_BLAS_H
#endif // !HAVE_CBLAS_H

  int ret = -1;
  double eps2 = it->eps * it->eps;
  int itmax = it->max;

  double *r  = (double *)malloc (sizeof (double) * n);
  double *rs = (double *)malloc (sizeof (double) * n);
  double *p  = (double *)malloc (sizeof (double) * n);
  double *ap = (double *)malloc (sizeof (double) * n);
  double *s  = (double *)malloc (sizeof (double) * n);
  double *t  = (double *)malloc (sizeof (double) * n);
  CHECK_MALLOC (r,  "bicgstab");
  CHECK_MALLOC (rs, "bicgstab");
  CHECK_MALLOC (p,  "bicgstab");
  CHECK_MALLOC (ap, "bicgstab");
  CHECK_MALLOC (s,  "bicgstab");
  CHECK_MALLOC (t,  "bicgstab");

  double rsap; // (r*, A.p)
  double st;
  double t2;

  double rho, rho1;
  double delta;
  double gamma;
  double beta;

  double res2 = 0.0;

#ifdef HAVE_CBLAS_H
  /**
   * ATLAS version
   */

  double b2 = cblas_ddot (n, b, 1, b, 1); // (b,b)
  eps2 *= b2;

  atimes (n, x, r, atimes_param);    // r = A.x ...
  cblas_daxpy (n, -1.0, b, 1, r, 1); //         - b

  cblas_dcopy (n, r, 1, rs, 1); // r* = r
  cblas_dcopy (n, r, 1, p, 1);  // p  = r

  rho = cblas_ddot (n, rs, 1, r, 1); // rho = (r*, r)

  int i;
  for (i = 0; i < itmax; i ++)
    {
      atimes (n, p, ap, atimes_param); // ap = A.p
      rsap = cblas_ddot (n, rs, 1, ap, 1); // rsap = (r*, A.p)
      delta = - rho / rsap;

      cblas_dcopy (n, r, 1, s, 1);         // s = r ...
      cblas_daxpy (n, delta, ap, 1, s, 1); //   + delta A.p
      atimes (n, s, t, atimes_param); // t = A.s

      st = cblas_ddot (n, s, 1, t, 1); // st = (s, t)
      t2 = cblas_ddot (n, t, 1, t, 1); // t2 = (t, t)
      gamma = - st / t2;

      cblas_dcopy (n, s, 1, r, 1);        // r = s ...
      cblas_daxpy (n, gamma, t, 1, r, 1); //   + gamma t

      cblas_daxpy (n, delta, p, 1, x, 1); // x = x + delta p...
      cblas_daxpy (n, gamma, s, 1, x, 1); //       + gamma s

      res2 = cblas_ddot (n, r, 1, r, 1);
      if (it->debug == 2)
	{
	  fprintf (it->out,
		   "libiter-bicgstab(cblas) %d %e\n",
		   i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      rho1 = cblas_ddot (n, rs, 1, r, 1); // rho = (r*, r)
      beta = rho1 / rho * delta / gamma;
      rho = rho1;

      cblas_daxpy (n, gamma, ap, 1, p, 1); // p = p + gamma A.p
      cblas_dscal (n, beta, p, 1);         // p = beta (p + gamma A.p)
      cblas_daxpy (n, 1.0, r, 1, p, 1); // p = r + beta(p + gamma A.p)
    }

#else // !HAVE_CBLAS_H
# ifdef HAVE_BLAS_H
  /**
   * BLAS version
   */

  double b2 = ddot_ (&n, b, &i_1, b, &i_1); // (b,b)
  eps2 *= b2;

  atimes (n, x, r, atimes_param);       // r = A.x ...
  daxpy_ (&n, &d_m1, b, &i_1, r, &i_1); //         - b

  dcopy_ (&n, r, &i_1, rs, &i_1); // r* = r
  dcopy_ (&n, r, &i_1, p, &i_1);  // p  = r

  rho = ddot_ (&n, rs, &i_1, r, &i_1); // rho = (r*, r)

  int i;
  for (i = 0; i < itmax; i ++)
    {
      atimes (n, p, ap, atimes_param); // ap = A.p
      rsap = ddot_ (&n, rs, &i_1, ap, &i_1); // rsap = (r*, A.p)
      delta = - rho / rsap;

      dcopy_ (&n, r, &i_1, s, &i_1);          // s = r ...
      daxpy_ (&n, &delta, ap, &i_1, s, &i_1); //   + delta A.p
      atimes (n, s, t, atimes_param); // t = A.s

      st = ddot_ (&n, s, &i_1, t, &i_1); // st = (s, t)
      t2 = ddot_ (&n, t, &i_1, t, &i_1); // t2 = (t, t)
      gamma = - st / t2;

      dcopy_ (&n, s, &i_1, r, &i_1);         // r = s ...
      daxpy_ (&n, &gamma, t, &i_1, r, &i_1); //   + gamma t

      daxpy_ (&n, &delta, p, &i_1, x, &i_1); // x = x + delta p...
      daxpy_ (&n, &gamma, s, &i_1, x, &i_1); //       + gamma s

      res2 = ddot_ (&n, r, &i_1, r, &i_1);
      if (it->debug == 2)
	{
	  fprintf (it->out,
		   "libiter-bicgstab(blas) %d %e\n",
		   i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}
      if (res2 > 1.0e20)
	{
	  // already too big residual
	  break;
	}

      rho1 = ddot_ (&n, rs, &i_1, r, &i_1); // rho = (r*, r)
      beta = rho1 / rho * delta / gamma;
      rho = rho1;

      daxpy_ (&n, &gamma, ap, &i_1, p, &i_1); // p = p + gamma A.p
      dscal_ (&n, &beta, p, &i_1);            // p = beta (p + gamma A.p)
      daxpy_ (&n, &d_1, r, &i_1, p, &i_1); // p = r + beta(p + gamma A.p)
    }

# else // !HAVE_BLAS_H
  /**
   * local BLAS version
   */

  double b2 = my_ddot (n, b, 1, b, 1); // (b,b)
  eps2 *= b2;

  atimes (n, x, r, atimes_param);    // r = A.x ...
  my_daxpy (n, -1.0, b, 1, r, 1); //         - b

  my_dcopy (n, r, 1, rs, 1); // r* = r
  my_dcopy (n, r, 1, p, 1);  // p = r

  rho = my_ddot (n, rs, 1, r, 1); // rho = (r*, r)

  int i;
  for (i = 0; i < itmax; i ++)
    {
      atimes (n, p, ap, atimes_param); // ap = A.p
      rsap = my_ddot (n, rs, 1, ap, 1); // rsap = (r*, A.p)
      delta = - rho / rsap;

      my_dcopy (n, r, 1, s, 1);         // s = r ...
      my_daxpy (n, delta, ap, 1, s, 1); //   + delta A.p
      atimes (n, s, t, atimes_param); // t = A.s

      st = my_ddot (n, s, 1, t, 1); // st = (s, t)
      t2 = my_ddot (n, t, 1, t, 1); // t2 = (t, t)
      gamma = - st / t2;

      my_dcopy (n, s, 1, r, 1);        // r = s ...
      my_daxpy (n, gamma, t, 1, r, 1); //   + gamma t

      my_daxpy (n, delta, p, 1, x, 1); // x = x + delta p...
      my_daxpy (n, gamma, s, 1, x, 1); //       + gamma s

      res2 = my_ddot (n, r, 1, r, 1);
      if (it->debug == 2)
	{
	  fprintf (it->out,
		   "libiter-bicgstab(myblas) %d %e\n",
		   i, res2 / b2);
	}
      if (res2 <= eps2)
	{
	  ret = 0; // success
	  break;
	}

      rho1 = my_ddot (n, rs, 1, r, 1); // rho = (r*, r)
      beta = rho1 / rho * delta / gamma;
      rho = rho1;

      my_daxpy (n, gamma, ap, 1, p, 1); // p = p + gamma A.p
      my_dscal (n, beta, p, 1);         // p = beta (p + gamma A.p)
      my_daxpy (n, 1.0, r, 1, p, 1); // p = r + beta(p + gamma A.p)
    }

# endif // !HAVE_BLAS_H
#endif // !HAVE_CBLAS_H

  free (r);
  free (rs);
  free (p);
  free (ap);
  free (s);
  free (t);

  if (it->debug == 1)
    {
      fprintf (it->out, "libiter-bicgstab %d %e\n", i, res2 / b2);
    }

  it->niter = i;
  it->res2  = res2 / b2;
  return (ret);
}
void variationalInequality_FixedPointProjection(VariationalInequality* problem, double *x, double *w, int* info, SolverOptions* options)
{
  /* /\* int and double parameters *\/ */
  int* iparam = options->iparam;
  double* dparam = options->dparam;
  /* Number of contacts */
  int n = problem->size;
  /* Maximum number of iterations */
  int itermax = iparam[0];
  /* Tolerance */
  double tolerance = dparam[0];

  /*****  Fixed point iterations *****/
  int iter = 0; /* Current iteration number */
  double error = 1.; /* Current error */
  int hasNotConverged = 1;

  double * xtmp = (double *)malloc(n * sizeof(double));
  double * wtmp = (double *)malloc(n * sizeof(double));

  double rho = 0.0, rho_k =0.0;
  int isVariable = 0;

  if (dparam[3] > 0.0)
  {
    rho = dparam[3];
    if (verbose > 0)
    {
      printf("----------------------------------- VI - Fixed Point Projection (FPP) - Fixed stepsize with  rho = %14.7e \n", rho);
    }
  }
  else
  {
    /* Variable step in iterations*/
    isVariable = 1;
    rho = -dparam[3];
    if (verbose > 0)
    {
      printf("----------------------------------- VI - Fixed Point Projection (FPP) - Variable stepsize with starting rho = %14.7e \n", rho);
    }
  }


  /* Variable for Line_search */
  int success =0;
  double error_k;
  int ls_iter = 0;
  int ls_itermax = 10;
  double tau=dparam[4], tauinv=dparam[5], L= dparam[6], Lmin = dparam[7];
  DEBUG_PRINTF("tau=%g, tauinv=%g, L= %g, Lmin = %g",dparam[4], dparam[5],  dparam[6], dparam[7] ) ;
  double a1=0.0, a2=0.0;
  double * x_k = NULL;
  double * w_k = NULL;

  if (isVariable)
  {
    x_k = (double *)malloc(n * sizeof(double));
    w_k = (double *)malloc(n * sizeof(double));
  }

  //isVariable=0;
  if (!isVariable)
  {
    /*   double minusrho  = -1.0*rho; */
    while ((iter < itermax) && (hasNotConverged > 0))
    {
      ++iter;

      problem->F(problem,n,x,w);
      cblas_daxpy(n, -1.0, w , 1, x , 1) ;
      cblas_dcopy(n , x , 1 , xtmp, 1);
      problem->ProjectionOnX(problem,xtmp,x);

      /* **** Criterium convergence **** */
      variationalInequality_computeError(problem, x , w, tolerance, options, &error);

      if (options->callback)
      {
        options->callback->collectStatsIteration(options->callback->env, n,
                                        x, w,
                                        error, NULL);
      }

      if (verbose > 0)
      {
        printf("----------------------------------- VI - Fixed Point Projection (FPP) - Iteration %i rho = %14.7e \tError = %14.7e\n", iter, rho, error);
      }
      if (error < tolerance) hasNotConverged = 0;
      *info = hasNotConverged;
    }
  }
  else if (isVariable)
  {
    if (iparam[1]==0) /* Armijo rule with Khotbotov ratio (default)   */
    {
      DEBUG_PRINT("Variable step size method with Armijo rule with Khotbotov ratio (default) \n");
      while ((iter < itermax) && (hasNotConverged > 0))
      {
        ++iter;
        /* Store the error */
        error_k = error;

        /* x_k <-- x store the x at the beginning of the iteration */
        cblas_dcopy(n , x , 1 , x_k, 1);
        /* compute w_k =F(x_k) */
        problem->F(problem,n,x,w_k);

        ls_iter = 0 ;
        success =0;
        rho_k=rho / tau;

        while (!success && (ls_iter < ls_itermax))
        {
          /* if (iparam[3] && ls_iter !=0) rho_k = rho_k * tau * min(1.0,a2/(rho_k*a1)); */
          /* else */ rho_k = rho_k * tau ;

          /* x <- x_k  for the std approach*/
          if (iparam[2]==0) cblas_dcopy(n, x_k, 1, x , 1) ;

          /* x <- x - rho_k*  w_k */
          cblas_daxpy(n, -rho_k, w_k , 1, x , 1) ;

          /* xtmp <-  ProjectionOnX(x) */
          problem->ProjectionOnX(problem,x,xtmp);
          problem->F(problem,n,xtmp,w);

          DEBUG_EXPR_WE( for (int i =0; i< 5 ; i++) { printf("xtmp[%i]=%12.8e\t",i,xtmp[i]);
              printf("w[%i]=F[%i]=%12.8e\n",i,i,w[i]);});
          /* velocitytmp <- velocity */
          /* cblas_dcopy(n, w, 1, wtmp , 1) ; */

          /* velocity <- velocity - velocity_k   */
          cblas_daxpy(n, -1.0, w_k , 1, w , 1) ;


          /* a1 =  ||w - w_k|| */
          a1 = cblas_dnrm2(n, w, 1);
          DEBUG_PRINTF("a1 = %12.8e\n", a1);

          /* xtmp <- x */
          cblas_dcopy(n, xtmp, 1,x , 1) ;

          /* xtmp <- x - x_k   */
          cblas_daxpy(n, -1.0, x_k , 1, xtmp , 1) ;

          /* a2 =  || x - x_k || */
          a2 = cblas_dnrm2(n, xtmp, 1) ;
          DEBUG_PRINTF("a2 = %12.8e\n", a2);

          DEBUG_PRINTF("test rho_k*a1 < L * a2 = %e < %e\n", rho_k*a1 , L * a2 ) ;
          success = (rho_k*a1 < L * a2)?1:0;

          /* printf("rho_k = %12.8e\t", rho_k); */
          /* printf("a1 = %12.8e\t", a1); */
          /* printf("a2 = %12.8e\t", a2); */
          /* printf("norm x = %12.8e\t",cblas_dnrm2(n, x, 1) ); */
          /* printf("success = %i\n", success); */

          ls_iter++;
        }

        /* problem->F(problem,x,w); */
        DEBUG_EXPR_WE( for (int i =0; i< 5 ; i++) { printf("x[%i]=%12.8e\t",i,x[i]);
            printf("w[%i]=F[%i]=%12.8e\n",i,i,w[i]);});

        /* **** Criterium convergence **** */
        variationalInequality_computeError(problem, x , w, tolerance, options, &error);

        DEBUG_EXPR_WE(
          if ((error < error_k))
          {
            printf("(error < error_k) is satisfied\n");
          };
          );
Exemple #29
0
void lanczos(double *F, double *Es, double *L, int n_eigs, int n_patch,
             int LANCZOS_ITR)
{
    double *b;
    double b_norm;

    double *z;
    double *alpha, *beta;
    double *q;
    int i;
    
    double *eigvec; // eigenvectors 

    // generate random b with norm 1.
    srand((unsigned int)time(NULL));
    b = (double *)malloc(n_patch * sizeof(double));
    for (i = 0; i < n_patch; i++)
        b[i] = rand();
    b_norm = norm2(b, n_patch);
    for (i = 0; i < n_patch; i++)
        b[i] /= b_norm;

    alpha = (double *)malloc( (LANCZOS_ITR + 1) * sizeof(double) );
    beta = (double *)malloc( (LANCZOS_ITR + 1) * sizeof(double) );
    beta[0] = 0.0; // beta_0 <- 0
    z = (double *)malloc( n_patch * sizeof(double));
    q = (double *)malloc( n_patch * (LANCZOS_ITR + 2) * sizeof(double) ); 
    memset(&q[0], 0, n_patch * sizeof(double)); // q_0 <- 0
    memcpy(&q[n_patch], b, n_patch * sizeof(double)); // q_1 <- b

    for (i = 1; i <= LANCZOS_ITR; i++) {
        // z = L * Q(:, i)
        cblas_dsymv(CblasColMajor, CblasLower, n_patch, 1.0, L,
                    n_patch, &q[i * n_patch], 1, 0.0, z, 1);
        // alpha(i) = Q(:, i)' * z;
        alpha[i] = cblas_ddot(n_patch, &q[i * n_patch], 1, z, 1);
        // z = z - alpha(i) * Q(:, i)
        cblas_daxpy(n_patch, -alpha[i], &q[i * n_patch], 1, z, 1);
        // z = z - beta(i - 1) * Q(:, i - 1);
        cblas_daxpy(n_patch, -beta[i - 1], &q[(i - 1) * n_patch], 1, z, 1);

        // beta(i) = norm(z, 2);
        beta[i] = cblas_dnrm2(n_patch, z, 1);
        // Q(:, i + 1) = z / beta(i);
        divide_copy(&q[(i + 1) * n_patch], z, n_patch, beta[i]);
    }

    // compute approximate eigensystem
    eigvec = (double *)malloc(LANCZOS_ITR * LANCZOS_ITR * sizeof(double));
    LAPACKE_dstedc(LAPACK_COL_MAJOR, 'I', LANCZOS_ITR, &alpha[1], &beta[1],
                   eigvec, LANCZOS_ITR); 
    // copy specified number of eigenvalues
    memcpy(Es, &alpha[1], n_eigs * sizeof(double));

    // V = Q(:, 1:k) * U
    cblas_dgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, n_patch,
                LANCZOS_ITR, LANCZOS_ITR, 1.0, &q[n_patch], n_patch, eigvec,
                LANCZOS_ITR, 0.0, L, n_patch);
    // copy the corresponding eigenvectors
    memcpy(F, L, n_patch * n_eigs * sizeof(double));

    free(b);
    free(z);
    free(alpha);
    free(beta);
    free(q);
    free(eigvec);
}
void conjugate_gradient_sparse(cs *A, double *b, double* x, int n, double itol)
{  
    int i,j;
    int iter;
    double rho,rho1,alpha,beta,omega;
     
    double r[n];
    double z[n];
    double q[n], temp_q[n];
    double p[n], temp_p[n];
    double res[n];
    double precond[n];  //Preconditioner
     
    memset(precond, 0, n*sizeof(double));
    memset(r, 0, n*sizeof(double));
    memset(z, 0, n*sizeof(double));
    memset(q, 0, n*sizeof(double));
    memset(temp_q, 0, n*sizeof(double));
    memset(p, 0, n*sizeof(double));
    memset(temp_p, 0, n*sizeof(double));
 
    /* Preconditioner */
    double max;
    int pp;
    for(j = 0; j < n; ++j){
        for(pp = A->p[j], max = fabs(A->x[pp]); pp < A->p[j+1]; pp++)
            if(fabs(A->x[pp]) > max)                  //vriskei to diagonio stoixeio
                max = fabs(A->x[pp]);
        precond[j] = 1/max;    
    }  
 
    cblas_dcopy (n, x, 1, res, 1);
 
    //r=b-Ax
    cblas_dcopy (n, b, 1, r, 1);
    memset(p, 0, n*sizeof(double));
    cs_gaxpy (A, x, p);
    for(i=0;i<n;i++){
        r[i]=r[i]-p[i];
     
    }
     
    double r_norm = cblas_dnrm2 (n, r, 1);
    double b_norm = cblas_dnrm2 (n, b, 1);
    if(!b_norm)
        b_norm = 1;
 
    iter = 0;  
     
    while( r_norm/b_norm > itol && iter < n )
    {
        iter++;
 
        cblas_dcopy (n, r, 1, z, 1);                //gia na min allaksei o r
         
        for(i=0;i<n;i++){
            z[i]=precond[i]*z[i];
     
        }
 
        rho = cblas_ddot (n, z, 1, r, 1);
        if (fpclassify(fabs(rho)) == FP_ZERO){
            printf("RHO aborting CG due to EPS...\n");
            exit(42);
        }
 
        if (iter == 1){
            cblas_dcopy (n, z, 1, p, 1);
        }
        else{      
            beta = rho/rho1;
     
            //p = z + beta*p;
            cblas_dscal (n, beta, p, 1);    //rescale
            cblas_daxpy (n, 1, z, 1, p, 1); //p = 1*z + p
             
        }      
        rho1 = rho;
         
        //q = Ap
        memset(q, 0, n*sizeof(double));
        cs_gaxpy (A, p, q);
 
        omega = cblas_ddot (n, p, 1, q, 1);
        if (fpclassify(fabs(omega)) == FP_ZERO){
            printf("OMEGA aborting CG due to EPS...\n");
            exit(42);
        }
 
        alpha = rho/omega; 
 
        //x = x + aplha*p;
        cblas_dcopy (n, p, 1, temp_p, 1);
        cblas_dscal (n, alpha, temp_p, 1);//rescale by alpha
        cblas_daxpy (n, 1, temp_p, 1, res, 1);// sum x = 1*x + temp_p
 
        //r = r - aplha*q;
        cblas_dcopy (n, q, 1, temp_q, 1);
        cblas_dscal (n, -alpha, temp_q, 1);//rescale by alpha
        cblas_daxpy (n, 1, temp_q, 1, r, 1);// sum r = 1*r - temp_p
 
        //next step
        r_norm = cblas_dnrm2 (n, r, 1);
    }
    cblas_dcopy (n, res, 1, x, 1);
 
}