/* *check M z + q =0 * * */ double LinearSystem_computeError(LinearSystemProblem* problem, double *z) { double * pM = problem->M->matrix0; double * pQ = problem->q; double error = 10; int n = problem->size; double * res = (double*)malloc(n * sizeof(double)); memcpy(res, pQ, n * sizeof(double)); cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, 1.0, pM, n, z, 1, 1.0, res, 1); error = cblas_dnrm2(n, res, 1); free(res); return error; }
///////////////Problem/////////////// Problem::Problem(long n,std::string structure="diagonal", std::string type = "nice") { m_A = Tools::initArray(n*n); m_x = Tools::initArray(n); m_b = Tools::initArray(n); m_n = n; double (*f_pntr)(long); if ( type.compare("nice") ) { f_pntr = randomNumber; } else if (type.compare("random")) { f_pntr = niceNumber; } if ( !structure.compare("diagonal") ) { for (long ii = 0; ii < m_n; ii++ ) { m_A[ii + m_n*ii] = f_pntr(ii); m_x[ii] = f_pntr(ii); } } else if (!structure.compare("dense")) { for( long ii = 0; ii < m_n*m_n; ii++) { m_A[ii] = f_pntr(ii); } for( long ii = 0; ii < m_n; ii++) { m_x[ii] = f_pntr(ii); } } int inc = 1; cblas_dgemv(CblasColMajor,CblasNoTrans,m_n,m_n,1.0,m_A,m_n,m_x,inc,0.0,m_b, inc); }
JNIEXPORT void JNICALL Java_uncomplicate_neanderthal_CBLAS_dgemv (JNIEnv * env, jclass clazz, jint Order, jint TransA, jint M, jint N, jdouble alpha, jobject A, jint lda, jobject X, jint incX, jdouble beta, jobject Y, jint incY) { double *cA = (double *) (*env)->GetDirectBufferAddress(env, A); double *cX = (double *) (*env)->GetDirectBufferAddress(env, X); double *cY = (double *) (*env)->GetDirectBufferAddress(env, Y); cblas_dgemv(Order, TransA, M, N, alpha, cA, lda, cX, incX, beta, cY, incY); };
ECLBLAS_CALL void dpotf2(bool & __isAllResult, size32_t & __lenResult, void * & __result, uint8_t tri, uint32_t r, bool isAllA, size32_t lenA, const void * A, bool clear) { unsigned int cells = r*r; __isAllResult = false; __lenResult = cells * sizeof(double); double *new_a = (double*) rtlMalloc(__lenResult); memcpy(new_a, A, __lenResult); double ajj; // x and y refer to the embedded vectors for the multiply, not an axis unsigned int diag, a_pos, x_pos, y_pos; unsigned int col_step = r; // between columns unsigned int row_step = 1; // between rows unsigned int x_step = (tri==UPPER_TRIANGLE) ? row_step : col_step; unsigned int y_step = (tri==UPPER_TRIANGLE) ? col_step : row_step; for (unsigned int j=0; j<r; j++) { diag = (j * r) + j; // diagonal x_pos = j * ((tri==UPPER_TRIANGLE) ? col_step : row_step); a_pos = (j+1) * ((tri==UPPER_TRIANGLE) ? col_step : row_step); y_pos = diag + y_step; // ddot.value <- x'*y ajj = new_a[diag] - cblas_ddot(j, (new_a+x_pos), x_step, (new_a+x_pos), x_step); //if ajj is 0, negative or NaN, then error if (ajj <= 0.0) { rtlFree(new_a); rtlFail(0, "Not a positive definite matrix"); } ajj = sqrt(ajj); new_a[diag] = ajj; if ( j < r-1) { // y <- alpha*op(A)*x + beta*y cblas_dgemv(CblasColMajor, (tri==UPPER_TRIANGLE) ? CblasTrans : CblasNoTrans, (tri==UPPER_TRIANGLE) ? j : r-1-j, // M (tri==UPPER_TRIANGLE) ? r-1-j : j, // N -1.0, // alpha (new_a+a_pos), r, //A (new_a+x_pos), x_step, //X 1.0, (new_a+y_pos), y_step); // beta and Y // x <- alpha * x cblas_dscal(r-1-j, 1.0/ajj, (new_a+y_pos), y_step); } // clear lower or upper part if clear flag set for(unsigned int k=1; clear && k<r-j; k++) new_a[(k*x_step)+diag] = 0.0; } __result = (void*) new_a; }
void method5(int M,int N,int T,double *Xin,double *Xout,double *Kern) { omp_set_num_threads(10); #pragma omp parallel { int tid = omp_get_thread_num(); int nthreads=omp_get_num_threads(); if (tid==0) printf("Using %d threads\n",omp_get_num_threads()); int i1=((N-T)*tid)/nthreads; int i2=((N-T)*(tid+1))/nthreads; for (int n=i1; n<i2; n++) { cblas_dgemv(CblasColMajor, CblasNoTrans, M, T, 1.0, &Xin[M*n], M, Kern, 1, 0.0, &Xout[M*n], 1); } } }
void VectorView::set_to_product(const MatrixView& m, const VectorView& v, const bool transpose) { CBLAS_TRANSPOSE tr; if (transpose){ tr = CblasTrans; assert(m.cols() == length()); assert(m.rows() == v.length()); } else { tr = CblasNoTrans; assert(m.cols() == v.length()); assert(m.rows() == length()); } cblas_dgemv(CblasColMajor, tr, m.rows(), m.cols(), 1.0, m.data(), m.stride(), v.data(), 1, 0.0, data_, 1); }
/* * Class: com_intel_analytics_bigdl_mkl_MKL * Method: vdgemv * Signature: (SSIIID[DII[DIID[DII)V */ JNIEXPORT void JNICALL Java_com_intel_analytics_bigdl_mkl_MKL_vdgemv (JNIEnv * env, jclass cls, jchar trans, jint m, jint n, jdouble alpha, jdoubleArray a, jint aOffset, jint lda, jdoubleArray x, jint xOffset, jint incx, jdouble beta, jdoubleArray y, jint yOffset, jint incy) { jdouble * jni_a = (*env)->GetPrimitiveArrayCritical(env, a, JNI_FALSE); jdouble * jni_x = (*env)->GetPrimitiveArrayCritical(env, x, JNI_FALSE); jdouble * jni_y = (*env)->GetPrimitiveArrayCritical(env, y, JNI_FALSE); int jni_trans; if(trans == 't' || trans == 'T') jni_trans = CblasTrans; else jni_trans = CblasNoTrans; cblas_dgemv(CblasColMajor, jni_trans, m, n, alpha, jni_a + aOffset, lda, jni_x + xOffset, incx, beta, jni_y + yOffset, incy); (*env)->ReleasePrimitiveArrayCritical(env, a, jni_a, 0); (*env)->ReleasePrimitiveArrayCritical(env, x, jni_x, 0); (*env)->ReleasePrimitiveArrayCritical(env, y, jni_y, 0); }
double * solve(size_t nfield, double * perm, double h, double lb, double ub) { double * op = buildOP(nfield,perm,h); double * rhs = buildRHS(nfield-1,lb+h,ub); double * inv = calloc_double((nfield-1) * (nfield-1)); pinv(nfield-1,nfield-1,nfield-1,op,inv,0.0); double * sol = calloc_double(nfield); cblas_dgemv(CblasColMajor,CblasNoTrans,nfield-1,nfield-1,1.0,inv,nfield-1, rhs,1,0.0,sol+1,1); free(op); op = NULL; free(rhs); rhs = NULL; free(inv); inv = NULL; return sol; }
void test_dgemv_trans2() { const size_t m=35, n=45; double a[m*n]; double x[m]; double y[n]; double z[n]; size_t i; for(i=0; i<m*n; i++) a[i]=i; for(i=0; i<n; i++) x[i]=i+m*n; for(i=0; i<m; i++) y[i]=z[i]=i*i; my_dgemv(CblasRowMajor,CblasTrans,m,n,2.0,a,n,x,1,2.0,y,1); cblas_dgemv(CblasRowMajor,CblasTrans,m,n,2.0,a,n,x,1,2.0,z,1); for(i=0; i<m; i++){ assert(y[i]==z[i]); } }
int gsl_blas_dgemv(CBLAS_TRANSPOSE_t TransA, double alpha, const gsl_matrix * A, const gsl_vector * X, double beta, gsl_vector * Y) { const size_t M = A->size1; const size_t N = A->size2; if ((TransA == CblasNoTrans && N == X->size && M == Y->size) || (TransA == CblasTrans && M == X->size && N == Y->size)) { cblas_dgemv(CblasRowMajor, TransA, INT(M), INT(N), alpha, A->data, INT(A->tda), X->data, INT(X->stride), beta, Y->data, INT(Y->stride)); return GSL_SUCCESS; } else { GSL_ERROR("invalid length", GSL_EBADLEN); } }
void test_dgemv() { const size_t m=3, n=4; double a[3*4]={ 1,2,3,4, 5,6,7,8, 9,10,11,12 }; double x[4]={2,1,4,3}; double y[3]={6,5,7}; double z[3]={6,5,7}; size_t i; my_dgemv(CblasRowMajor,CblasNoTrans,m,n,2.0,a,n,x,1,2.0,y,1); cblas_dgemv(CblasRowMajor,CblasNoTrans,m,n,2.0,a,n,x,1,2.0,z,1); for(i=0; i<m; i++){ assert(y[i]==z[i]); } }
void ReducedLinearForceModel::GetInternalForce(double * q, double * internalForces) { CBLAS_ORDER order= CblasColMajor; CBLAS_TRANSPOSE trans= CblasNoTrans; int m = r; int n = r; double alpha = 1; double * a = stiffnessMatrix; int lda = r; double * x = q; int incx = 1; double beta = 0; double * y = internalForces; int incy = 1; cblas_dgemv(order, trans, m, n, alpha, a, lda, x, incx, beta, y, incy); }
int main(int iArgCnt, char* sArrArgs[]) { int iIterationNo = 0; double dNormOfResult = 0; double dTime0 = 0, dTime1 = 0, dTimeDiff = 0, dMinTimeDiff = DBL_MAX, dMaxTimeDiff = 0; parseInputs(iArgCnt, sArrArgs); MPI_Init(&iArgCnt, &sArrArgs); MPI_Comm_size(MPI_COMM_WORLD, &GiProcessCnt); MPI_Comm_rank(MPI_COMM_WORLD, &GiProcessRank); initData(); for(iIterationNo = 0; iIterationNo < GiIterationCnt; iIterationNo++) { MPI_Barrier(MPI_COMM_WORLD); dTime0 = MPI_Wtime(); cblas_dgemv(CblasRowMajor, CblasNoTrans, GiRowCntForOneProc, GiVectorLength, 1.0, GdArrSubMatrix, GiVectorLength, GdArrVector, 1, 0.0, GdArrSubResult, 1); MPI_Barrier(MPI_COMM_WORLD); MPI_Gather(GdArrSubResult, GiRowCntForOneProc, MPI_DOUBLE, GdArrTotalResult, GiRowCntForOneProc, MPI_DOUBLE, 0, MPI_COMM_WORLD); dTime1 = MPI_Wtime(); dTimeDiff = (dTime1 - dTime0); if(dTimeDiff > dMaxTimeDiff) dMaxTimeDiff = dTimeDiff; if(dTimeDiff < dMinTimeDiff) dMinTimeDiff = dTimeDiff; } if(GiProcessRank == 0) { dNormOfResult = cblas_dnrm2(GiVectorLength, GdArrTotalResult, 1); printf("Result=%f\nMin Time=%f uSec\nMax Time=%f uSec\n", dNormOfResult, (1.e6 * dMinTimeDiff), (1.e6 * dMaxTimeDiff)); } MPI_Finalize(); return 0; }
/** In place prediction of the next state mean and covariances */ void predict_forward(kf_t *kf) { //TODO take advantage of sparsity in this function double x[kf->state_dim]; memcpy(x, kf->state_mean, kf->state_dim * sizeof(double)); //TODO make more efficient via the structure of the transition matrix cblas_dgemv(CblasRowMajor, CblasNoTrans, // CBLAS_ORDER, CBLAS_TRANSPOSE kf->state_dim, kf->state_dim, // int M, int N, 1, (double *) kf->transition_mtx, kf->state_dim, // double 1, double *A, int lda x, 1, // double *X, int incX 0, kf->state_mean, 1); // double beta, double *Y, int incY // VEC_PRINTF((double *) state_mean, kf->state_dim); double state_cov[kf->state_dim * kf->state_dim]; reconstruct_udu(kf->state_dim, kf->state_cov_U, kf->state_cov_D, state_cov); // MAT_PRINTF((double *) state_cov, kf->state_dim, kf->state_dim); //TODO make more efficient via the structure of the transition matrix double FC[kf->state_dim * kf->state_dim]; cblas_dsymm(CblasRowMajor, CblasRight, CblasUpper, //CBLAS_ORDER, CBLAS_SIDE, CBLAS_UPLO kf->state_dim, kf->state_dim, // int M, int N 1, state_cov, kf->state_dim, // double alpha, double *A, int lda kf->transition_mtx, kf->state_dim, // double *B, int ldb 0, FC, kf->state_dim); // double beta, double *C, int ldc // MAT_PRINTF((double *) FC, kf->state_dim, kf->state_dim); //TODO make more efficient via the structure of the transition matrix double FCF[kf->state_dim * kf->state_dim]; memcpy(FCF, kf->transition_cov, kf->state_dim * kf->state_dim * sizeof(double)); cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, // CBLAS_ORDER, CBLAS_TRANSPOSE transA, cBLAS_TRANSPOSE transB kf->state_dim, kf->state_dim, kf->state_dim, // int M, int N, int K 1, FC, kf->state_dim, // double alpha, double *A, int lda kf->transition_mtx, kf->state_dim, //double *B, int ldb 1, FCF, kf->state_dim); //beta, double *C, int ldc // MAT_PRINTF((double *) FCF, kf->state_dim, kf->state_dim); udu(kf->state_dim, FCF, kf->state_cov_U, kf->state_cov_D); // MAT_PRINTF((double *) state_cov_U, kf->state_dim, kf->state_dim); // VEC_PRINTF((double *) state_cov_D, kf->state_dim); }
static void FB_compute_H_mlcp(void* data_opaque, double* z, double* w, double* workV1, double* workV2, NumericsMatrix* H) { printf("MLCP FB_compute_H_mlcp not implemented yet"); exit(1); #if 0 MixedLinearComplementarityProblem* data = (MixedLinearComplementarityProblem *)data_opaque; unsigned int n = data->size; assert(data->M); assert(data->M->matrix0); double* M = data->M->matrix0; double normi; // workV1 = "z" in Facchibei--Pang p. 808 // "z_i" = 1 if z_i = w_i = 0.0 // M^T.workV1 --> workV2 cblas_dgemv(CblasColMajor, CblasTrans, n, n, 1.0, M, n , workV1, 1, 0.0, workV2, 1); for (unsigned int i = 0; i < n; ++i) { if (workV1[i] != 0.0) // i in beta { normi = sqrt(workV1[i] * workV1[i] + workV2[i] * workV2[i]); for (unsigned int j = 0; j < n; j++) { H[j * n + i] = (workV2[i] / normi - 1.0) * M[j * n + i]; } H[i * n + i] += (workV1[i] / normi - 1.0); } else // i not in beta { normi = sqrt(z[i] * z[i] + w[i] * w[i]); for (unsigned int j = 0; j < n; j++) { H[j * n + i] = (w[i] / normi - 1.0) * M[j * n + i]; } H[i * n + i] += (z[i] / normi - 1.0); } } #endif }
void HMM::smoother(int iseq, double* c, double* beta, double* Z) { int si = o->sind[iseq]; // start index of the current sequence int sl = o->slen[iseq]; // length of the current sequence uint32_t* y_ = o->y + si; // pointer to the current sequence double one = 1; // initialize: fill last frame of beta with ones vfill(&one, beta + (sl-1)*hs, 1, hs); for(int t = sl - 2; t > -1; t--) { double* out = beta + t*hs; vmul(beta + (t+1)*hs, 1, g + y_[t+1], os, Z, 1, hs); // using out as a temp vector because in cblas_dgemv, x cannot be y cblas_dgemv(CblasRowMajor, CblasNoTrans, hs, hs, 1, Q, hs, Z, 1, 0, out, 1); vsdiv(out, 1, c + t+1, out, 1, hs); } }
void StVKReducedHessianTensor::ContractWithVector(int r, double * Hq, double * q, double * A) { // computes A = Hq : q int quadraticSize = StVKReducedInternalForces::GetQuadraticSize(r); // multiply Hq and q cblas_dgemv(CblasColMajor, CblasTrans, r, quadraticSize, 1.0, Hq, r, q, 1, 0.0, A, 1); for(int j=r-1; j>=0; j--) for(int i=r-1; i>=j; i--) { int lowerTrianglePos = j * r - (j-1) * j / 2 + (i-j); A[ELT(r,i,j)] = A[lowerTrianglePos]; A[ELT(r,j,i)] = A[lowerTrianglePos]; } }
/* Numerics Matrix wrapper for y <- alpha trans(A) x + beta y */ void NM_tgemv(const double alpha, NumericsMatrix* A, const double *x, const double beta, double *y) { switch (A->storageType) { case NM_DENSE: { cblas_dgemv(CblasColMajor, CblasTrans, A->size0, A->size1, alpha, A->matrix0, A->size0, x, 1, beta, y, 1); break; } case NM_SPARSE_BLOCK: case NM_SPARSE: { CHECK_RETURN(cs_aaxpy(alpha, NM_csc_trans(A), x, beta, y)); break; } default: { assert(0 && "NM_tgemv unknown storageType"); } } }
// computes means of the rows of A, subtracts them from A, and returns them in meanVec on the root process // assumes memory has already been allocated for meanVec void computeAndSubtractRowMeans(double *localRowChunk, double *meanVec, distMatrixInfo *matInfo) { int mpi_rank = matInfo->mpi_rank; int numcols = matInfo->numcols; int localrows = matInfo->localrows; int * rowcounts = matInfo->rowcounts; int * rowoffsets = matInfo->rowoffsets; MPI_Comm *comm = matInfo->comm; double *onesVec = (double *) malloc( numcols * sizeof(double)); double *localMeanVec = (double *) malloc( localrows * sizeof(double)); for(int idx = 0; idx < numcols; idx = idx + 1) { onesVec[idx]=1; } cblas_dgemv(CblasRowMajor, CblasNoTrans, localrows, numcols, 1.0/((double)numcols), localRowChunk, numcols, onesVec, 1, 0, localMeanVec, 1); cblas_dger(CblasRowMajor, localrows, numcols, -1.0, localMeanVec, 1, onesVec, 1, localRowChunk, numcols); if (mpi_rank != 0) { MPI_Gatherv(localMeanVec, localrows, MPI_DOUBLE, NULL, NULL, NULL, MPI_DOUBLE, 0, *comm); } else { MPI_Gatherv(localMeanVec, localrows, MPI_DOUBLE, meanVec, rowcounts, rowoffsets, MPI_DOUBLE, 0, *comm); } free(onesVec); free(localMeanVec); }
AA_API void aa_la_xlsnp( size_t m, size_t n, const double *A, const double *A_star, const double *x, const double *yp, double *y ) { aa_la_mvmul(n,m,A_star,x,y); double *B = (double*)aa_mem_region_local_alloc( sizeof(double) * n*n ); // B = A^* A cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, (int)n, (int)n, (int)m, 1, A_star, (int)n, A, (int)m, 0, B, (int)n ); // B = A^* A - I for( size_t i = 0; i < n; i ++ ) AA_MATREF(B,n,i,i) -= 1; // y = y + -B yp cblas_dgemv( CblasColMajor, CblasNoTrans, (int)n, (int)n, -1.0, B, (int)n, yp, 1, 1, y, 1 ); aa_mem_region_local_pop( B ); }
void plotMerit(double *z, double psi_k, double descentCondition) { int incx = 1, incy = 1; double q_0, q_tk, qp_tk, merit_k; /* double tmin = 1e-12; */ double tk = 1, aux; double m1 = 1e-4; double Nstep = 0; int i = 0; FILE *fp; (*sFphi)(sN, z, sphi_z, 0); aux = cblas_dnrm2(sN, sphi_z, 1); /* Computes merit function */ aux = 0.5 * aux * aux; printf("plot psi_z %e\n", aux); if (!sPlotMerit) return; if (sPlotMerit) { /* sPlotMerit=0;*/ strcpy(fileName, "outputLS"); (*sFphi)(sN, z, sphi_z, 0); q_0 = cblas_dnrm2(sN, sphi_z , incx); q_0 = 0.5 * q_0 * q_0; fp = fopen(fileName, "w"); /* sPlotMerit=0;*/ tk = 5e-7; aux = -tk; Nstep = 1e4; for (i = 0; i < 2 * Nstep; i++) { cblas_dcopy(sN, z, incx, sz2, incx); cblas_daxpy(sN , aux , sdir_descent , incx , sz2 , incy); (*sFphi)(sN, sz2, sphi_z, 0); q_tk = cblas_dnrm2(sN, sphi_z , incx); q_tk = 0.5 * q_tk * q_tk; (*sFjacobianPhi)(sN, sz2, sjacobianPhi_z, 1); /* Computes the jacobian of the merit function, jacobian_psi = transpose(jacobianPhiMatrix).phiVector */ cblas_dgemv(CblasColMajor,CblasTrans, sN, sN, 1.0, sjacobianPhi_z, sN, sphi_z, incx, 0.0, sgrad_psi_z, incx); qp_tk = cblas_ddot(sN, sgrad_psi_z, 1, sdir_descent, 1); merit_k = psi_k + m1 * aux * descentCondition; fprintf(fp, "%e %.16e %.16e %e\n", aux, q_tk, merit_k, qp_tk); if (i == Nstep - 1) aux = 0; else aux += tk / Nstep; } fclose(fp); } }
int nonSmoothNewtonNeigh(int n, double* z, NewtonFunctionPtr* phi, NewtonFunctionPtr* jacobianPhi, int* iparam, double* dparam) { int itermax = iparam[0]; // maximum number of iterations allowed int iterMaxWithSameZ = itermax / 4; int niter = 0; // current iteration number double tolerance = dparam[0]; /* double coef; */ sFphi = phi; sFjacobianPhi = jacobianPhi; // verbose=1; if (verbose > 0) { printf(" ============= Starting of Newton process =============\n"); printf(" - tolerance: %14.7e\n - maximum number of iterations: %i\n", tolerance, itermax); } int incx = 1; /* int n2 = n*n; */ int infoDGESV; /** merit function and its jacobian */ double psi_z; /** The algorithm is alg 4.1 of the paper of Kanzow and Kleinmichel, "A new class of semismooth Newton-type methods for nonlinear complementarity problems", in Computational Optimization and Applications, 11, 227-251 (1998). We try to keep the same notations */ double rho = 1e-8; double descentCondition, criterion, norm_jacobian_psi_z, normPhi_z; double p = 2.1; double terminationCriterion = 1; double norm; int findNewZ, i, j, NbLookingForANewZ; /* int naux=0; */ double aux = 0; /* double aux1=0; */ int ii; int resls = 1; /* char c; */ /* double * oldz; */ /* oldz=(double*)malloc(n*sizeof(double));*/ NbLookingForANewZ = 0; /** Iterations ... */ while ((niter < itermax) && (terminationCriterion > tolerance)) { scmp++; ++niter; /** Computes phi and its jacobian */ if (sZsol) { for (ii = 0; ii < sN; ii++) szzaux[ii] = sZsol[ii] - z[ii]; printf("dist zzsol %.32e.\n", cblas_dnrm2(n, szzaux, 1)); } (*sFphi)(n, z, sphi_z, 0); (*sFjacobianPhi)(n, z, sjacobianPhi_z, 1); /* Computes the jacobian of the merit function, jacobian_psi = transpose(jacobianPhiMatrix).phiVector */ cblas_dgemv(CblasColMajor,CblasTrans, n, n, 1.0, sjacobianPhi_z, n, sphi_z, incx, 0.0, sgrad_psi_z, incx); norm_jacobian_psi_z = cblas_dnrm2(n, sgrad_psi_z, 1); /* Computes norm2(phi) */ normPhi_z = cblas_dnrm2(n, sphi_z, 1); /* Computes merit function */ psi_z = 0.5 * normPhi_z * normPhi_z; if (normPhi_z < tolerance) { /*it is the solution*/ terminationCriterion = tolerance / 2.0; break; } if (verbose > 0) { printf("Non Smooth Newton, iteration number %i, norm grad psi= %14.7e , psi = %14.7e, normPhi = %e .\n", niter, norm_jacobian_psi_z, psi_z, normPhi_z); printf(" -----------------------------------------------------------------------\n"); } NbLookingForANewZ++; if (niter > 2) { if (10 * norm_jacobian_psi_z < tolerance || !resls || NbLookingForANewZ > iterMaxWithSameZ) { NbLookingForANewZ = 0; resls = 1; /* if (NbLookingForANewZ % 10 ==1 && 0){ printf("Try NonMonotomnelineSearch\n"); cblas_dcopy(n,sgrad_psi_z,1,sdir_descent,1); cblas_dscal( n , -1.0 ,sdir_descent,incx); NonMonotomnelineSearch( z, phi, 10); continue; } */ /* FOR DEBUG ONLY*/ if (sZsol) { printf("begin plot prev dir\n"); plotMerit(z, 0, 0); printf("end\n"); /* gets(&c);*/ (*sFphi)(n, sZsol, szaux, 0); printf("value psi(zsol)=%e\n", cblas_dnrm2(n, szaux, 1)); cblas_dcopy(n, sZsol, incx, szaux, incx); cblas_daxpy(n , -1 , z , 1 , szaux , 1); printf("dist to sol %e \n", cblas_dnrm2(n, szaux, 1)); for (ii = 0; ii < n; ii++) sdir_descent[ii] = sZsol[ii] - z[ii]; aux = norm; norm = 1; printf("begin plot zzsol dir\n"); plotMerit(z, 0, 0); printf("end\n"); /* gets(&c);*/ norm = aux; } printf("looking for a new Z...\n"); /*may be a local minimal*/ /*find a gradiant going out of this cul-de-sac.*/ norm = n / 2; findNewZ = 0; for (j = 0; j < 20; j++) { for (i = 0; i < n; i++) { if (sZsol) { /* FOR DEBUG ONLY*/ (*sFphi)(n, sZsol, sphi_zaux, 0); norm = cblas_dnrm2(n, sphi_zaux, 1); printf("Norm of the sol %e.\n", norm); for (ii = 0; ii < n; ii++) sdir_descent[ii] = sZsol[ii] - z[ii]; norm = 1; } else { for (ii = 0; ii < n; ii++) { sdir_descent[ii] = 1.0 * rand(); } cblas_dscal(n, 1 / cblas_dnrm2(n, sdir_descent, 1), sdir_descent, incx); cblas_dscal(n, norm, sdir_descent, incx); } cblas_dcopy(n, z, incx, szaux, incx); // cblas_dscal(n,0.0,zaux,incx); /* zaux = z + dir */ cblas_daxpy(n , norm , sdir_descent , 1 , szaux , 1); /* Computes the jacobian of the merit function, jacobian_psi_zaux = transpose(jacobianPhi_zaux).phi_zaux */ (*sFphi)(n, szaux, sphi_zaux, 0); (*sFjacobianPhi)(n, szaux, sjacobianPhi_zaux, 1); /* FOR DEBUG ONLY*/ if (sZsol) { aux = cblas_dnrm2(n, sphi_zaux, 1); printf("Norm of the sol is now %e.\n", aux); for (ii = 0; ii < n; ii++) printf("zsol %e zaux %e \n", sZsol[ii], szaux[ii]); } cblas_dgemv(CblasColMajor, CblasTrans, n, n, 1.0, sjacobianPhi_zaux, n, sphi_zaux, incx, 0.0, sgrad_psi_zaux, incx); cblas_dcopy(n, szaux, 1, szzaux, 1); cblas_daxpy(n , -1 , z , incx , szzaux , incx); /*zzaux must be a descente direction.*/ /*ie jacobian_psi_zaux.zzaux <0 printf("jacobian_psi_zaux : \n");*/ /*cblas_dcopy(n,sdir,incx,sdir_descent,incx); plotMerit(z, phi);*/ aux = cblas_ddot(n, sgrad_psi_zaux, 1, szzaux, 1); /* aux1 = cblas_dnrm2(n,szzaux,1); aux1 = cblas_dnrm2(n,sgrad_psi_zaux,1);*/ aux = aux / (cblas_dnrm2(n, szzaux, 1) * cblas_dnrm2(n, sgrad_psi_zaux, 1)); /* printf("aux: %e\n",aux);*/ if (aux < 0.1 * (j + 1)) { //zaux is the new point. findNewZ = 1; cblas_dcopy(n, szaux, incx, z, incx); break; } } if (findNewZ) break; if (j == 10) { norm = n / 2; } else if (j > 10) norm = -2 * norm; else norm = -norm / 2.0; } if (! findNewZ) { printf("failed to find a new z\n"); /* exit(1);*/ continue; } else continue; } } /* Stops if the termination criterion is satisfied */ terminationCriterion = norm_jacobian_psi_z; /* if(terminationCriterion < tolerance){ break; }*/ /* Search direction calculation Find a solution dk of jacobianPhiMatrix.d = -phiVector. dk is saved in phiVector. */ cblas_dscal(n , -1.0 , sphi_z, incx); DGESV(n, 1, sjacobianPhi_z, n, sipiv, sphi_z, n, &infoDGESV); if (infoDGESV) { printf("DGEV error %d.\n", infoDGESV); } cblas_dcopy(n, sphi_z, 1, sdir_descent, 1); criterion = cblas_dnrm2(n, sdir_descent, 1); /* printf("norm dir descent %e\n",criterion);*/ /*printf("begin plot descent dir\n"); plotMerit(z, phi); printf("end\n"); gets(&c);*/ /*printf("begin plot zzsol dir\n"); plotMeritToZsol(z,phi); printf("end\n"); gets(&c);*/ /* norm = cblas_dnrm2(n,sdir_descent,1); printf("norm desc %e \n",norm); cblas_dscal( n , 1/norm , sdir_descent, 1); */ /* descentCondition = jacobian_psi.dk */ descentCondition = cblas_ddot(n, sgrad_psi_z, 1, sdir_descent, 1); /* Criterion to be satisfied: error < -rho*norm(dk)^p */ criterion = -rho * pow(criterion, p); /* printf("ddddddd %d\n",scmp); if (scmp>100){ displayMat(sjacobianPhi_z,n,n,n); exit(1); }*/ // if ((infoDGESV != 0 || descentCondition > criterion) && 0) // { // printf("no a desc dir, get grad psy\n"); /* dk = - jacobian_psi (remind that dk is saved in phi_z) */ // cblas_dcopy(n, sgrad_psi_z, 1, sdir_descent, 1); // cblas_dscal(n , -1.0 , sdir_descent, incx); /*DEBUG ONLY*/ /*printf("begin plot new descent dir\n"); plotMerit(z); printf("end\n"); gets(&c);*/ // } /* coef=fabs(norm_jacobian_psi_z*norm_jacobian_psi_z/descentCondition); if (coef <1){ cblas_dscal(n,coef,sdir_descent,incx); printf("coef %e norm dir descent is now %e\n",coef,cblas_dnrm2(n,sdir_descent,1)); }*/ /* Step-3 Line search: computes z_k+1 */ /*linesearch_Armijo(n,z,sdir_descent,psi_z, descentCondition, phi);*/ /* if (niter == 10){ printf("begin plot new descent dir\n"); plotMerit(z); printf("end\n"); gets(&c); }*/ /* memcpy(oldz,z,n*sizeof(double));*/ resls = linesearch2_Armijo(n, z, psi_z, descentCondition); if (!resls && niter > 1) { /* displayMat(sjacobianPhi_z,n,n,n); printf("begin plot new descent dir\n"); plotMerit(oldz,psi_z, descentCondition); printf("end\n"); gets(&c);*/ } /* lineSearch_Wolfe(z, descentCondition, phi,jacobianPhi);*/ /* if (niter>3){ printf("angle between prev dir %e.\n",acos(cblas_ddot(n, sdir_descent, 1, sPrevDirDescent, 1)/(cblas_dnrm2(n,sdir_descent,1)*cblas_dnrm2(n,sPrevDirDescent,1)))); }*/ cblas_dcopy(n, sdir_descent, 1, sPrevDirDescent, 1); /* for (j=20;j<32;j++){ if (z[j]<0) z[j]=0; }*/ /* if( 1 || verbose>0) { printf("Non Smooth Newton, iteration number %i, error grad equal to %14.7e , psi value is %14.7e .\n",niter, terminationCriterion,psi_z); printf(" -----------------------------------------------------------------------\n"); }*/ } /* Total number of iterations */ iparam[1] = niter; /* Final error */ dparam[1] = terminationCriterion; /** Free memory*/ if (verbose > 0) { if (dparam[1] > tolerance) printf("Non Smooth Newton warning: no convergence after %i iterations\n" , niter); else printf("Non Smooth Newton: convergence after %i iterations\n" , niter); printf(" The residue is : %e \n", dparam[1]); } /* free(oldz);*/ if (dparam[1] > tolerance) return 1; else return 0; }
int lineSearch_Wolfe(double *z, double qp_0) { int incx = 1, incy = 1; double q_0, q_tk, qp_tk; double tmin = 1e-12; int maxiter = 100; int niter = 0; double tk = 1; double tg, td; double m1 = 0.1; double m2 = 0.9; (*sFphi)(sN, z, sphi_z, 0); q_0 = cblas_dnrm2(sN, sphi_z , incx); q_0 = 0.5 * q_0 * q_0; tg = 0; td = 10e5; tk = (tg + td) / 2.0; while (niter < maxiter || (td - tg) < tmin) { niter++; /*q_tk = 0.5*|| phi(z+tk*d) ||*/ cblas_dcopy(sN, z, incx, sz2, incx); cblas_daxpy(sN , tk , sdir_descent , incx , sz2 , incy); (*sFphi)(sN, sz2, sphi_z, 0); q_tk = cblas_dnrm2(sN, sphi_z , incx); q_tk = 0.5 * q_tk * q_tk; (*sFjacobianPhi)(sN, sz2, sjacobianPhi_z, 1); /* Computes the jacobian of the merit function, jacobian_psi = transpose(jacobianPhiMatrix).phiVector */ cblas_dgemv(CblasColMajor,CblasTrans, sN, sN, 1.0, sjacobianPhi_z, sN, sphi_z, incx, 0.0, sgrad_psi_z, incx); qp_tk = cblas_ddot(sN, sgrad_psi_z, 1, sdir_descent, 1); if (qp_tk < m2 * qp_0 && q_tk < q_0 + m1 * tk * qp_0) { /*too small*/ if (niter == 1) break; tg = tk; tk = (tg + td) / 2.0; continue; } else if (q_tk > q_0 + m1 * tk * qp_0) { /*too big*/ td = tk; tk = (tg + td) / 2.0; continue; } else break; } cblas_dcopy(sN, sz2, incx, z, incx); if ((td - tg) <= tmin) { printf("NonSmoothNewton2::lineSearchWolfe warning, resulting tk < tmin, linesearch stopped.\n"); return 0; } return 1; }
/* Linesearch */ int linesearch2_Armijo(int n, double *z, double psi_k, double descentCondition) { /* IN : psi_k (merit function for current iteration) jacobian_psi_k (jacobian of the merit function) dk: descent direction OUT: tk, z */ double m1 = 0.1; double tk = 1; double tkl, tkr, tkaux; int incx = 1, incy = 1; double merit, merit_k; double tmin = 1e-14; double qp_tk; /* cblas_dcopy(sN, z, incx,sz2,incx);*/ /* z1 = z0 + dir */ /* cblas_daxpy(n , 1.0 , sdir_descent , incx , z , incy );*/ tk = 3.25; while (tk > tmin) { /* Computes merit function = 1/2*norm(phi(z_{k+1}))^2 */ cblas_dcopy(sN, z, incx, sz2, incx); cblas_daxpy(n , tk , sdir_descent , incx , sz2 , incy); (*sFphi)(n, sz2, sphi_z, 0); merit = cblas_dnrm2(n, sphi_z , incx); merit = 0.5 * merit * merit; merit_k = psi_k + m1 * tk * descentCondition; if (merit < merit_k) { tkl = 0; tkr = tk; /*calcul merit'(tk)*/ (*sFjacobianPhi)(sN, sz2, sjacobianPhi_z, 1); /* Computes the jacobian of the merit function, jacobian_psi = transpose(jacobianPhiMatrix).phiVector */ cblas_dgemv(CblasColMajor,CblasTrans, sN, sN, 1.0, sjacobianPhi_z, sN, sphi_z, incx, 0.0, sgrad_psi_zaux, incx); qp_tk = cblas_ddot(sN, sgrad_psi_zaux, 1, sdir_descent, 1); if (qp_tk > 0) { while (fabs(tkl - tkr) > tmin) { tkaux = 0.5 * (tkl + tkr); cblas_dcopy(sN, z, incx, sz2, incx); cblas_daxpy(n , tkaux , sdir_descent , incx , sz2 , incy); /*calcul merit'(tk)*/ (*sFphi)(n, sz2, sphi_z, 0); (*sFjacobianPhi)(sN, sz2, sjacobianPhi_z, 1); /* Computes the jacobian of the merit function, jacobian_psi = transpose(jacobianPhiMatrix).phiVector */ cblas_dgemv(CblasColMajor,CblasTrans, sN, sN, 1.0, sjacobianPhi_z, sN, sphi_z, incx, 0.0, sgrad_psi_zaux, incx); qp_tk = cblas_ddot(sN, sgrad_psi_zaux, 1, sdir_descent, 1); if (qp_tk > 0) { tkr = tkaux; } else { tkl = tkaux; } } } /* printf("merit = %e, merit_k=%e,tk= %e,tkaux=%e \n",merit,merit_k,tk,tkaux);*/ cblas_dcopy(sN, sz2, incx, z, incx); break; } tk = tk * 0.5; } if (tk <= tmin) { cblas_dcopy(sN, sz2, incx, z, incx); printf("NonSmoothNewton::linesearch2_Armijo warning, resulting tk=%e < tmin, linesearch stopped.\n", tk); return 0; } return 1; }
void caffe_cpu_gemv<double>(const CBLAS_TRANSPOSE TransA, const int M, const int N, const double alpha, const double* A, const double* x, const double beta, double* y) { cblas_dgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1); }
/* * (input) double *z : size n+m * (output)double *w : size n+m * * */ int mlcp_compute_error(MixedLinearComplementarityProblem* problem, double *z, double *w, double tolerance, double * error) { /* Checks inputs */ if (problem == NULL || z == NULL || w == NULL) numerics_error("mlcp_compute_error", "null input for problem and/or z and/or w"); int param = 1; int NbLines = problem->M->size0; /* Equalities */ int n = problem->n; /* Equalities */ int m = problem->m; /* Inequalities */ int incx = 1, incy = 1; /* Computation of w: depends on the way the problem is written */ /* Problem in the form (M,q) */ if (problem->isStorageType1) { if (problem->M == NULL) numerics_error("mlcp_compute_error", "null input for M"); /* Computes w = Mz + q */ cblas_dcopy(NbLines , problem->q , incx , w , incy); prodNumericsMatrix(problem->M->size1, problem->M->size0, 1.0, problem->M, z, 1.0, w); } /* Problem in the form ABCD */ else //if (problem->isStorageType2) { /* Checks inputs */ if (problem->A == NULL || problem->B == NULL || problem->C == NULL || problem->D == NULL) { numerics_error("mlcp_compute_error: ", "null input for A, B, C or D"); } /* Links to problem data */ double *a = &problem->q[0]; double *b = &problem->q[NbLines - m]; double *A = problem->A; double *B = problem->B; double *C = problem->C; double *D = problem->D; /* Compute "equalities" part, we = Au + Cv + a - Must be equal to 0 */ cblas_dcopy(NbLines - m , a , incx , w , incy); // we = w[0..n-1] <-- a cblas_dgemv(CblasColMajor,CblasNoTrans , NbLines - m, n , 1.0 , A , NbLines - m , &z[0] , incx , 1.0 , w , incy); // we <-- A*u + we cblas_dgemv(CblasColMajor,CblasNoTrans , NbLines - m, m , 1.0 , C , NbLines - m , &z[n] , incx , 1.0 , w , incy); // we <-- C*v + we /* Computes part which corresponds to complementarity */ double * pwi = w + NbLines - m; // No copy!! cblas_dcopy(m , b , incx , pwi , incy); // wi = w[n..m] <-- b // following int param, we recompute the product wi = Du+BV +b and we = Au+CV +a // The test is then more severe if we compute w because it checks that the linear equation is satisfied if (param == 1) { cblas_dgemv(CblasColMajor,CblasNoTrans , m, n , 1.0 , D , m , &z[0] , incx , 1.0 , pwi , incy); // wi <-- D*u+ wi cblas_dgemv(CblasColMajor,CblasNoTrans , m , m , 1.0 , B , m , &z[n] , incx , 1.0 , pwi , incy); // wi <-- B*v + wi } } /* Error on equalities part */ double error_e = 0; /* Checks complementarity (only for rows number n to size) */ double error_i = 0.; double zi, wi; double *q = problem->q; double norm_e = 1; double norm_i = 1; if (problem->blocksRows) { int numBlock = 0; while (problem->blocksRows[numBlock] < n + m) { if (!problem->blocksIsComp[numBlock]) { error_e += cblas_dnrm2(problem->blocksRows[numBlock + 1] - problem->blocksRows[numBlock], w + problem->blocksRows[numBlock] , incx); norm_e += cblas_dnrm2(problem->blocksRows[numBlock + 1] - problem->blocksRows[numBlock], q + problem->blocksRows[numBlock] , incx); } else { for (int numLine = problem->blocksRows[numBlock]; numLine < problem->blocksRows[numBlock + 1] ; numLine++) { zi = z[numLine]; wi = w[numLine]; if (zi < 0.0) { error_i += -zi; if (wi < 0.0) error_i += zi * wi; } if (wi < 0.0) error_i += -wi; if ((zi > 0.0) && (wi > 0.0)) error_i += zi * wi; } norm_i += cblas_dnrm2(problem->blocksRows[numBlock + 1] - problem->blocksRows[numBlock], w + problem->blocksRows[numBlock] , incx); } numBlock++; } } else { printf("WARNING, DEPRECATED MLCP API\n"); /* Error on equalities part */ error_e = cblas_dnrm2(NbLines - m , w , incx);; /* Checks complementarity (only for rows number n to size) */ error_i = 0.; for (int i = 0 ; i < m ; i++) { zi = z[n + i]; wi = w[(NbLines - m) + i]; if (zi < 0.0) { error_i += -zi; if (wi < 0.0) error_i += zi * wi; } if (wi < 0.0) error_i += -wi; if ((zi > 0.0) && (wi > 0.0)) error_i += zi * wi; } /* Computes error */ norm_i += cblas_dnrm2(m , q + NbLines - m , incx); norm_e += cblas_dnrm2(NbLines - m , q , incx); } if (error_i / norm_i >= error_e / norm_e) { *error = error_i / (1.0 + norm_i); } else { *error = error_e / (1.0 + norm_e); } if (*error > tolerance) { /*if (isVerbose > 0) printf(" Numerics - mlcp_compute_error failed: error = %g > tolerance = %g.\n",*error, tolerance);*/ if (verbose) printf(" Numerics - mlcp_compute_error failed: error = %g > tolerance = %g.\n", *error, tolerance); /* displayMLCP(problem);*/ return 1; } else { if (verbose > 0) printf("Siconos/Numerics: mlcp_compute_error: Error evaluation = %g \n", *error); return 0; } }
void FrictionContact2D_latin(FrictionContactProblem* problem , double *reaction , double *velocity , int *info, SolverOptions* options) { int nc = problem->numberOfContacts; assert(nc>0); double * vec = problem->M->matrix0; double *qq = problem->q; double * mu = problem->mu; int info77 = 0; int i, j, kk, iter1, ino, ddl, nrhs; int info2 = 0; int n = 2 * nc; size_t idim, nbno; int incx = 1, incy = 1; size_t taille, taillet, taillen, itt; int *ddln; int *ddlt, *vectnt; assert(n>0); double errmax, alpha, beta, maxa, k_latin; double aa, nt, wn, tc, zc0; double err1, num11, err0; double den11, den22, knz0, ktz0, *ktz, *wf; double *wc, *zc, *wt, *maxwt, *wnum1, *znum1; double *zt, *maxzt; double *kn, *kt; // char trans='T', diag='N'; // char uplo='U', notrans='N'; double *k, *DPO, *kf, *kninv; double *kinvwden1, *kzden1, *kfinv, *knz, *wtnc; /* Recup input */ itt = options->iparam[0]; errmax = options->dparam[0]; k_latin = options->dparam[2]; /* Initialize output */ options->iparam[1] = 0; options->dparam[1] = 0.0; /* Allocations */ k = (double*) malloc(n * n * sizeof(double)); DPO = (double*) malloc(n * n * sizeof(double)); kf = (double*) malloc(n * n * sizeof(double)); kfinv = (double*) malloc(n * n * sizeof(double)); kninv = (double*) malloc(nc * nc * sizeof(double)); kn = (double*) malloc(nc * nc * sizeof(double)); kt = (double*) malloc(nc * nc * sizeof(double)); kinvwden1 = (double*) malloc(n * sizeof(double)); kzden1 = (double*) malloc(n * sizeof(double)); wc = (double*) malloc(n * sizeof(double)); zc = (double*) malloc(n * sizeof(double)); znum1 = (double*) malloc(n * sizeof(double)); wnum1 = (double*) malloc(n * sizeof(double)); wt = (double*) malloc(n * sizeof(double)); maxzt = (double*) malloc(n * sizeof(double)); knz = (double*) malloc(nc * sizeof(double)); wtnc = (double*) malloc(nc * sizeof(double)); ktz = (double*) malloc(nc * sizeof(double)); wf = (double*) malloc(nc * sizeof(double)); maxwt = (double*) malloc(nc * sizeof(double)); zt = (double*) malloc(nc * sizeof(double)); vectnt = (int*) malloc(n * sizeof(int)); ddln = (int*) malloc(nc * sizeof(int)); ddlt = (int*) malloc(nc * sizeof(int)); /* Initialization */ for (i = 0; i < n * n; i++) { k[i] = 0.; kf[i] = 0.; kfinv[i] = 0.; if (i < nc * nc) { kn[i] = 0.0; kt[i] = 0.0; kninv[i] = 0.0; if (i < n) { wc[i] = 0.0; zc[i] = 0.; reaction[i] = 0.; velocity[i] = 0.; znum1[i] = 0.; wnum1[i] = 0.; wt[i] = 0.; maxzt[i] = 0.; if (i < nc) { maxwt[i] = 0.; zt[i] = 0.; knz[i] = 0.; ktz[i] = 0.; wf[i] = 0.; wtnc[i] = 0.; } } } } for (i = 0; i < n; i++) { if (fabs(vec[i * n + i]) < DBL_EPSILON) { if (verbose > 0) printf("\n Warning nul diagonal term in M matrix \n"); free(k); free(DPO); free(kf); free(kfinv); free(kninv); free(kn); free(kt); free(kinvwden1); free(kzden1); free(wc); free(zc); free(znum1); free(wnum1); free(wt); free(maxzt); free(knz); free(wtnc); free(ktz); free(wf); free(maxwt); free(zt); free(vectnt); free(ddln); free(ddlt); *info = 3; return; } else { k[i + n * i] = k_latin / vec[i * n + i]; vectnt[i] = i + 1; } } for (i = 0; i < nc; i++) { ddln[i] = vectnt[2 * i]; if (i != 0) ddlt[i] = vectnt[2 * i - 1]; else ddlt[i] = 0; } for (i = 0; i < nc; i++) { kn[i + nc * i] = k[ddln[i] + n * ddln[i]]; kt[i + nc * i] = k[ddlt[i] + n * ddlt[i]]; } taillen = sizeof(ddln) / sizeof(ddln[0]); taillet = sizeof(ddlt) / sizeof(ddlt[0]); idim = 1 + taillen / taillet; taille = 0; for (i = 0; i < n; i++) taille = sizeof(qq[i]) + taille; taille = taille / sizeof(qq[0]); nbno = taille / idim; for (i = 0; i < nc; i++) { kf[ddln[i] + n * ddln[i]] = kn[i + nc * i]; kf[ddlt[i] + n * ddlt[i]] = kt[i + nc * i]; } for (i = 0; i < n; i++) { kfinv[i + n * i] = 1. / kf[i + n * i]; if (i < nc) kninv[i + nc * i] = 1. / kt[i + nc * i]; } for (i = 0; i < n; i++) for (j = 0; j < n; j++) DPO[i + n * j] = vec[j * n + i] + kfinv[i + n * j]; DPOTRF(LA_UP, n, DPO , n, &info2); if (info2 != 0) { if (verbose > 0) printf("\n Matter with Cholesky factorization \n"); free(k); free(DPO); free(kf); free(kfinv); free(kninv); free(kn); free(kt); free(kinvwden1); free(kzden1); free(wc); free(zc); free(znum1); free(wnum1); free(wt); free(maxzt); free(knz); free(wtnc); free(ktz); free(wf); free(maxwt); free(zt); free(vectnt); free(ddln); free(ddlt); *info = 2; return; } /* Iteration loops */ iter1 = 0; err1 = 1.; while ((iter1 < itt) && (err1 > errmax)) { /* Linear stage (zc,wc) -> (z,w) */ alpha = 1.; beta = 1.; cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kfinv, n, zc, incx, beta, wc, incy); cblas_dcopy(n, qq, incx, znum1, incy); alpha = -1.; cblas_dscal(n , alpha , znum1 , incx); alpha = 1.; cblas_daxpy(n, alpha, wc, incx, znum1, incy); nrhs = 1; DTRTRS(LA_UP, LA_TRANS, LA_NONUNIT, n, nrhs, DPO, n, znum1, n, &info77); DTRTRS(LA_UP, LA_NOTRANS, LA_NONUNIT, n, nrhs, DPO, n, znum1, n, &info77); cblas_dcopy(n, znum1, incx, reaction, incy); alpha = -1.; beta = 1.; cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kfinv, n, reaction, incx, beta, wc, incy); cblas_dcopy(n, wc, incx, velocity, incy); /* Local stage (z,w)->(zc,wc) */ for (i = 0; i < n; i++) { zc[i] = 0.; wc[i] = 0.0; } /* Normal party */ for (i = 0; i < nc; i++) { knz0 = 0.; for (kk = 0; kk < nc; kk++) { knz[i] = kt[i + nc * kk] * velocity[ddlt[kk]] + knz0; knz0 = knz[i]; } zt[i] = reaction[ddlt[i]] - knz[i]; if (zt[i] > 0.0) { zc[ddlt[i]] = zt[i]; maxzt[i] = 0.0; } else { zc[ddlt[i]] = 0.0; maxzt[i] = -zt[i]; } } for (i = 0; i < nc; i++) { zc0 = 0.; ktz0 = 0.; for (j = 0; j < nc; j++) { wc[ddlt[i]] = kninv[i + nc * j] * maxzt[j] + zc0; zc0 = wc[ddlt[i]]; ktz[i] = kn[i + nc * j] * velocity[ddln[j]] + ktz0; ktz0 = ktz[i]; } wf[i] = reaction[ddln[i]] - ktz[i]; } /* Loop other nodes */ for (ino = 0; ino < nbno; ino++) { ddl = ddln[ino]; nt = fabs(wf[ino]); /* Tangential vector */ if (nt < 1.e-8) tc = 0.; else tc = wf[ino] / nt; /* Tangentiel component */ wn = zc[ddlt[ino]]; aa = nt - mu[ino] * wn; if (aa > 0.0) { maxa = aa; } else { maxa = 0.0; } wc[ddl] = (maxa / (-1 * kn[ino + nc * ino])) * tc; aa = -nt + mu[ino] * wn; if (aa > 0.0) { maxa = aa; } else { maxa = 0.0; } zc[ddl] = (mu[ino] * wn - maxa) * tc; } /* Convergence criterium */ cblas_dcopy(n, reaction, incx, znum1, incy); alpha = -1.; cblas_daxpy(n, alpha, zc, incx, znum1, incy); cblas_dcopy(n, velocity, incx, wnum1, incy); cblas_daxpy(n, alpha, wc, incx, wnum1, incy); alpha = 1.; beta = 1.; cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kf, n, wnum1, incx, beta, znum1, incy); num11 = 0.; alpha = 1.; beta = 0.; cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kfinv, n, znum1, incx, beta, wnum1, incy); num11 = cblas_ddot(n, wnum1, incx, znum1, incy); cblas_dcopy(n, reaction, incx, znum1, incy); alpha = 1.; beta = 1.; cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kf, n, velocity, incx, beta, znum1, incy); alpha = 1.; beta = 0.; cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kfinv, n, znum1, incx, beta, wnum1, incy); den11 = cblas_ddot(n, wnum1, incx, znum1, incy); cblas_dcopy(n, zc, incx, znum1, incy); alpha = 1.; beta = 1.; cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kf, n, wc, incx, beta, znum1, incy); alpha = 1.; beta = 0.; cblas_dgemv(CblasColMajor,CblasNoTrans, n, n, alpha, kfinv, n, znum1, incx, beta, wnum1, incy); den22 = cblas_ddot(n, znum1, incx, wnum1, incy); err0 = num11 / (den11 + den22); err1 = sqrt(err0); options->iparam[1] = iter1; options->dparam[1] = err1; iter1 = iter1 + 1; } if (err1 > errmax) { if (verbose > 0) printf("No convergence after %d iterations, the residue is %g\n", iter1, err1); *info = 1; } else { if (verbose > 0) printf("Convergence after %d iterations, the residue is %g \n", iter1, err1); *info = 0; } free(k); free(DPO); free(kf); free(kfinv); free(kninv); free(kn); free(kt); free(kinvwden1); free(kzden1); free(wc); free(zc); free(znum1); free(wnum1); free(wt); free(maxzt); free(knz); free(wtnc); free(ktz); free(wf); free(maxwt); free(zt); free(vectnt); free(ddln); free(ddlt); }
int main ( ) { CBLAS_LAYOUT Layout; CBLAS_TRANSPOSE transa; double *a, *x, *y; double alpha, beta; int m, n, lda, incx, incy, i; Layout = CblasColMajor; transa = CblasNoTrans; m = 4; /* Size of Column ( the number of rows ) */ n = 4; /* Size of Row ( the number of columns ) */ lda = 4; /* Leading dimension of 5 * 4 matrix is 5 */ incx = 1; incy = 1; alpha = 1; beta = 0; a = (double *)malloc(sizeof(double)*m*n); x = (double *)malloc(sizeof(double)*n); y = (double *)malloc(sizeof(double)*n); /* The elements of the first column */ a[0] = 1; a[1] = 2; a[2] = 3; a[3] = 4; /* The elements of the second column */ a[m] = 1; a[m+1] = 1; a[m+2] = 1; a[m+3] = 1; /* The elements of the third column */ a[m*2] = 3; a[m*2+1] = 4; a[m*2+2] = 5; a[m*2+3] = 6; /* The elements of the fourth column */ a[m*3] = 5; a[m*3+1] = 6; a[m*3+2] = 7; a[m*3+3] = 8; /* The elemetns of x and y */ x[0] = 1; x[1] = 2; x[2] = 1; x[3] = 1; y[0] = 0; y[1] = 0; y[2] = 0; y[3] = 0; cblas_dgemv( Layout, transa, m, n, alpha, a, lda, x, incx, beta, y, incy ); /* Print y */ for( i = 0; i < n; i++ ) printf(" y%d = %f\n", i, y[i]); free(a); free(x); free(y); return 0; }
/* * The equalities are eliminated. * *0=(Me_1 Me_2)(Re Ri)' + Qe *Vi=(Mi_1 Mi_2)(Re Ri)' + Qi * *Re=-Me_1^{-1}(Me_2Ri+Qe) * *Vi=(Mi_2-Mi_1 Me_1^{-1} Me_2)Ri+Qi-Mi1 Me_1^{-1} Qe * */ void GMPReducedSolve(GenericMechanicalProblem* pInProblem, double *reaction , double *velocity, int * info, SolverOptions* options, NumericsOptions* numerics_options) { SparseBlockStructuredMatrix* m = pInProblem->M->matrix1; int nbRow = m->blocksize0[m->blocknumber0 - 1]; int nbCol = m->blocksize1[m->blocknumber1 - 1]; double *Me = (double *) malloc(nbRow * nbCol * sizeof(double)); double *Qe = (double *) malloc(nbRow * sizeof(double)); double *Mi = (double *) malloc(nbRow * nbCol * sizeof(double)); double *Qi = (double *) malloc(nbRow * sizeof(double)); int Me_size; int Mi_size; buildReducedGMP(pInProblem, Me, Mi, Qe, Qi, &Me_size, &Mi_size); if ((Me_size == 0 || Mi_size == 0)) { genericMechanicalProblem_GS(pInProblem, reaction, velocity, info, options, numerics_options); free(Me); free(Qe); free(Mi); free(Qi); return; } double * pseduInvMe1 = (double *)malloc(Me_size * Me_size * sizeof(double)); memcpy(pseduInvMe1, Me, Me_size * Me_size * sizeof(double)); pinv(pseduInvMe1, Me_size, Me_size, 1e-16); double *Mi2 = Mi + Mi_size * Me_size; double *Mi1 = Mi; double *Me2 = Me + Me_size * Me_size; #ifdef GMP_DEBUG_GMPREDUCED_SOLVE double *Me1 = Me; FILE * titi = fopen("buildReducedGMP_output.txt", "w"); printf("GMPReducedsolve\n"); printDenseMatrice("Me1", titi, Me1, Me_size, Me_size); printDenseMatrice("Me2", titi, Me2, Me_size, Mi_size); printDenseMatrice("Mi1", titi, Mi1, Mi_size, Me_size); printDenseMatrice("Mi2", titi, Mi2, Mi_size, Mi_size); printDenseMatrice("Qe", titi, Qe, Me_size, 1); printDenseMatrice("Qi", titi, Qi, Mi_size, 1); printDenseMatrice("Me1inv", titi, pseduInvMe1, Me_size, Me_size); #endif double * reducedProb = (double *)malloc(Mi_size * Mi_size * sizeof(double)); memcpy(reducedProb, Mi2, Mi_size * Mi_size * sizeof(double)); double * Mi1pseduInvMe1 = (double *)malloc(Mi_size * Me_size * sizeof(double)); cblas_dgemm(CblasColMajor,CblasNoTrans, CblasNoTrans, Mi_size, Me_size, Me_size, -1.0, Mi1, Mi_size, pseduInvMe1, Me_size, 0.0, Mi1pseduInvMe1, Mi_size); #ifdef GMP_DEBUG_GMPREDUCED_SOLVE printDenseMatrice("minusMi1pseduInvMe1", titi, Mi1pseduInvMe1, Mi_size, Me_size); fprintf(titi, "_minusMi1pseduInvMe1=-Mi1*Me1inv;\n"); #endif cblas_dgemv(CblasColMajor,CblasNoTrans, Mi_size, Me_size, 1.0, Mi1pseduInvMe1, Mi_size, Qe, 1, 1.0, Qi, 1); #ifdef GMP_DEBUG_GMPREDUCED_SOLVE printDenseMatrice("newQi", titi, Qi, Mi_size, 1); fprintf(titi, "_newQi=Qi+_minusMi1pseduInvMe1*Qe;\n"); #endif cblas_dgemm(CblasColMajor,CblasNoTrans, CblasNoTrans, Mi_size, Mi_size, Me_size, 1.0, Mi1pseduInvMe1, Mi_size, Me2, Me_size, 1.0, reducedProb, Mi_size); #ifdef GMP_DEBUG_GMPREDUCED_SOLVE printDenseMatrice("W", titi, reducedProb, Mi_size, Mi_size); fprintf(titi, "_W=Mi2+_minusMi1pseduInvMe1*Me2;\n"); #endif listNumericsProblem * curProblem = 0; GenericMechanicalProblem * _pnumerics_GMP = buildEmptyGenericMechanicalProblem(); curProblem = pInProblem->firstListElem; while (curProblem) { switch (curProblem->type) { case SICONOS_NUMERICS_PROBLEM_EQUALITY: { break; } case SICONOS_NUMERICS_PROBLEM_LCP: { addProblem(_pnumerics_GMP, curProblem->type, curProblem->size); break; } case SICONOS_NUMERICS_PROBLEM_FC3D: { FrictionContactProblem* pFC3D = (FrictionContactProblem*)addProblem(_pnumerics_GMP, curProblem->type, curProblem->size); *(pFC3D->mu) = *(((FrictionContactProblem*)curProblem->problem)->mu); break; } default: printf("GMPReduced buildReducedGMP: problemType unknown: %d . \n", curProblem->type); } curProblem = curProblem->nextProblem; } NumericsMatrix numM; numM.storageType = 0; numM.matrix0 = reducedProb; numM.matrix1 = 0; numM.size0 = Mi_size; numM.size1 = Mi_size; _pnumerics_GMP->M = &numM; _pnumerics_GMP->q = Qi; double *Rreduced = (double *) malloc(Mi_size * sizeof(double)); double *Vreduced = (double *) malloc(Mi_size * sizeof(double)); genericMechanicalProblem_GS(_pnumerics_GMP, Rreduced, Vreduced, info, options, numerics_options); #ifdef GMP_DEBUG_GMPREDUCED_SOLVE if (*info) { printf("\nGMPREduced failed!\n"); } else { printf("\nGMPREduced succed!\n"); printDenseMatrice("Ri", titi, Rreduced, Mi_size, 1); printDenseMatrice("Vi", titi, Vreduced, Mi_size, 1); } #endif if (!*info) { /*Re computation*/ double * Re = (double*)malloc(Me_size * sizeof(double)); double * Rbuf = (double*)malloc(Me_size * sizeof(double)); memcpy(Rbuf, Qe, Me_size * sizeof(double)); cblas_dgemv(CblasColMajor,CblasNoTrans, Me_size, Mi_size, 1.0, Me2, Me_size, Rreduced, 1, 1.0, Rbuf, 1); cblas_dgemv(CblasColMajor,CblasNoTrans, Me_size, Me_size, -1.0, pseduInvMe1, Me_size, Rbuf, 1, 0.0, Re, 1); #ifdef GMP_DEBUG_GMPREDUCED_SOLVE fprintf(titi, "_Re=-Me1inv*(Me2*Ri+Qe);\n"); printDenseMatrice("Re", titi, Re, Me_size, 1); #endif GMPReducedSolToSol(pInProblem, reaction, velocity, Re, Rreduced, Vreduced); double err; int tolViolate = GenericMechanical_compute_error(pInProblem, reaction, velocity, options->dparam[0], options, &err); if (tolViolate) { printf("GMPReduced, warnning, reduced problem solved, but error of intial probleme violated tol = %e, err= %e\n", options->dparam[0], err); } free(Re); free(Rbuf); } #ifdef GMP_DEBUG_GMPREDUCED_SOLVE fclose(titi); #endif free(Rreduced); free(Vreduced); freeGenericMechanicalProblem(_pnumerics_GMP, NUMERICS_GMP_FREE_GMP); free(Me); free(Mi); free(Qe); free(Qi); free(pseduInvMe1); free(reducedProb); free(Mi1pseduInvMe1); // GenericMechanicalProblem GMPOutProblem; // SparseBlockStructuredMatrix mOut; }
int test_prodNumericsMatrix(NumericsMatrix** MM) { NumericsMatrix* M1 = MM[0]; NumericsMatrix* M2 = MM[1]; NumericsMatrix* M3 = MM[2]; NumericsMatrix* M4 = MM[3]; printf("== Numerics tests: prodNumericsMatrix(NumericsMatrix,vector) == \n"); int i , n = M1->size1, m = 4; double * x = (double *)malloc(n * sizeof(double)); double * x2 = (double *)malloc(m * sizeof(double)); double alpha = 2.3, beta = 1.9; double * yref = (double *)malloc(n * sizeof(double)); double * yref2 = (double *)malloc(n * sizeof(double));; double * y = (double *)malloc(n * sizeof(double)); double * y2 = (double *)malloc(n * sizeof(double)); for (i = 0; i < n; i++) { x[i] = i + 1.0; yref[i] = 0.1 * i; yref2[i] = 0.1 * i; y[i] = yref[i]; y2[i] = yref2[i]; } x2[0] = 0; x2[1] = 0; x2[2] = 0; x2[3] = 0; int incx = 1, incy = 1; cblas_dgemv(CblasColMajor, CblasNoTrans, n, n, alpha, M1->matrix0, n, x, incx, beta, yref, incy); prodNumericsMatrix(n, n, alpha, M1, x, beta, y); double tol = 1e-12; int info = 0; for (i = 0; i < n; i++) { if (fabs(y[i] - yref[i]) > tol) info = 1; // printf("%lf\n", fabs(y[i]-yref[i])); } if (info == 0) printf("Step 0 ( y = alpha*A*x + beta*y, double* storage) ok ...\n"); else printf("Step 0 ( y = alpha*A*x + beta*y, double* storage) failed ...\n"); cblas_dgemv(CblasColMajor, CblasNoTrans, n, m, alpha, M3->matrix0, n, x2, incx, beta, yref2, incy); prodNumericsMatrix(m, n, alpha, M3, x2, beta, y2); for (i = 0; i < n; i++) { if (fabs(y2[i] - yref2[i]) > tol) info = 1; /* printf("%lf\n", fabs(y2[i]-yref2[i])); */ /* printf("%lf\n",y2[i]); */ /* printf("%lf\n",yref2[i]); */ } if (info == 0) printf("Step 1 ( y = alpha*A*x + beta*y, double* storage, non square) ok ...\n"); else printf("Step 1 ( y = alpha*A*x + beta*y, double* storage, non square) failed ...\n"); /* Sparse ... */ for (i = 0; i < n; i++) { y[i] = 0.1 * i; y2[i] = 0.1 * i; } prodNumericsMatrix(n, n, alpha, M2, x, beta, y); for (i = 0; i < n; i++) { if (fabs(y[i] - yref[i]) > tol) info = 1; /* printf("%lf\n", fabs(y[i]-yref[i])); */ /* printf("%lf\n", y[i]); */ } if (info == 0) printf("Step 2 ( y = alpha*A*x + beta*y, sparse storage) ok ...\n"); else printf("Step 2 ( y = alpha*A*x + beta*y, sparsestorage) failed ...\n"); prodNumericsMatrix(m, n, alpha, M4, x2, beta, y2); for (i = 0; i < n; i++) { if (fabs(y2[i] - yref2[i]) > tol) info = 1; /* printf("%lf\n", fabs(y2[i]-yref2[i])); */ /* printf("%lf\n",y2[i]); */ /* printf("%lf\n",yref2[i]); */ } if (info == 0) printf("Step 3 ( y = alpha*A*x + beta*y, sparse storage, non square) ok ...\n"); else printf("Step 3 ( y = alpha*A*x + beta*y, sparsestorage, non square) failed ...\n"); free(x); free(x2); free(y); free(y2); free(yref); free(yref2); printf("== End of test prodNumericsMatrix(NumericsMatrix,vector), result = %d\n", info); return info; }