static scs_int pcg(const AMatrix * A, const Settings * stgs, Priv * pr, const scs_float * s, scs_float * b, scs_int max_its, scs_float tol) { scs_int i, n = A->n; scs_float ipzr, ipzrOld, alpha; scs_float *p = pr->p; /* cg direction */ scs_float *Gp = pr->Gp; /* updated CG direction */ scs_float *r = pr->r; /* cg residual */ scs_float *z = pr->z; /* for preconditioning */ scs_float *M = pr->M; /* inverse diagonal preconditioner */ if (s == NULL) { memcpy(r, b, n * sizeof(scs_float)); memset(b, 0, n * sizeof(scs_float)); } else { matVec(A, stgs, pr, s, r); addScaledArray(r, b, n, -1); scaleArray(r, -1, n); memcpy(b, s, n * sizeof(scs_float)); } /* check to see if we need to run CG at all */ if (calcNorm(r, n) < MIN(tol, 1e-18)) { return 0; } applyPreConditioner(M, z, r, n, &ipzr); memcpy(p, z, n * sizeof(scs_float)); for (i = 0; i < max_its; ++i) { matVec(A, stgs, pr, p, Gp); alpha = ipzr / innerProd(p, Gp, n); addScaledArray(b, p, n, alpha); addScaledArray(r, Gp, n, -alpha); if (calcNorm(r, n) < tol) { #if EXTRAVERBOSE > 0 scs_printf("tol: %.4e, resid: %.4e, iters: %li\n", tol, calcNorm(r, n), (long) i+1); #endif return i + 1; } ipzrOld = ipzr; applyPreConditioner(M, z, r, n, &ipzr); scaleArray(p, ipzr / ipzrOld, n); addScaledArray(p, z, n, 1); } return i; }
template <class F> boost::shared_ptr<std::vector<std::complex<F> > > CpuIterativeSolver<F>::getResult (UNUSED std::ostream& log, UNUSED Core::ProfilingDataPtr prof) { std::vector<ctype>& pvec = tmpVec1 (); std::vector<ctype>& xvec = this->xvec (); g ().multMat (matVec ().cc ().cc_sqrt (), xvec, pvec); return boost::make_shared<std::vector<std::complex<F> > > (pvec); }
//----------------------------------------------------------------------- double powerMethod(int rank) { MPI_Barrier(MPI_COMM_WORLD); double xNorm = 0; int iteration = 0; for (iteration = 0; iteration < NUM_ITERATIONS; iteration++) { if (MASTER(rank)) { xNorm = norm(); //printf("At iteration %d, the norm of x is %f\n", iteration, xNorm); int index = 0; for (index = 0; index < n; index++) { x[index] = x[index] / xNorm; //printf("x[%d] = %f\n", index, x[index]); } } MPI_Barrier(MPI_COMM_WORLD); matVec(rank); MPI_Barrier(MPI_COMM_WORLD); } MPI_Barrier(MPI_COMM_WORLD); return xNorm; }
template <class F> F CpuIterativeSolver<F>::initGeneral (const std::vector<ctype>& einc, std::ostream& log, const std::vector<ctype>& start, UNUSED Core::ProfilingDataPtr prof) { std::vector<ctype>& pvec = tmpVec1 (); for (int j = 0; j < 3; j++) for (uint32_t i = g ().nvCount (); i < g ().vecStride (); i++) pvec[i + j * g ().vecStride ()] = 0; g ().multMat (matVec ().cc ().cc_sqrt (), einc, pvec); ftype temp = LinAlg::norm (pvec); this->residScale = 1 / temp; ftype inprodR = 0.0 / 0.0; if (start.size () != 0) { std::vector<ctype>& xvec = this->xvec (); g ().multMatInv (matVec ().cc ().cc_sqrt (), start, xvec); // xvec = start / cc_sqrt std::vector<ctype>& Avecbuffer = this->Avecbuffer (); matVec ().apply (xvec, Avecbuffer, false); std::vector<ctype>& rvec = this->rvec (); LinAlg::linComb (Avecbuffer, ctype (-1), pvec, rvec); inprodR = LinAlg::norm (rvec); log << "Use loaded start value" << std::endl; } else { std::vector<ctype>& Avecbuffer = this->Avecbuffer (); matVec ().apply (pvec, Avecbuffer, false); std::vector<ctype>& rvec = this->rvec (); LinAlg::linComb (Avecbuffer, ctype (-1), pvec, rvec); inprodR = LinAlg::norm (rvec); log << "temp = " << temp << ", inprodR = " << inprodR << std::endl; std::vector<ctype>& xvec = this->xvec (); if (temp < inprodR) { log << "Use 0" << std::endl; LinAlg::fill<ctype> (xvec, 0); swap (rvec, pvec); inprodR = temp; } else { log << "Use pvec" << std::endl; swap (xvec, pvec); } } log << "|r_0|^2: " << temp << std::endl; return inprodR; }
// Subroutine for the power method, to return the spectral radius double powerMethod(double * mat, double * x, int size, int iter) { int i, j; int nprocs, myrank; MPI_Comm_rank(MPI_COMM_WORLD, &myrank); MPI_Comm_size(MPI_COMM_WORLD, &nprocs); double * local_x = (double *)malloc(size/nprocs*sizeof(double)); for (i = 0; i < iter; ++i) { //printf("initializing iteration %d\n", i); double normVal = norm2(x, size); //printf("norm2 gives value %f on iteration%d\n", normVal, i); for (j = 0; j < size; ++j) { x[j] = x[j] / normVal; } if(myrank == 0) { //fprintf(stdout, "iteration %d: performing scatter\n", i); } MPI_Bcast(x, size, MPI_DOUBLE, 0, MPI_COMM_WORLD); /* if(myrank == 0) fprintf(stdout, "Printing all vectors after broadcast:\n"); printf("Process %d -----\n", myrank); printBuf(x, size); printf("\n------------------\n"); */ matVec(mat, x, local_x, size/nprocs, size); /* MPI_Barrier(MPI_COMM_WORLD); if(myrank == 0) fprintf(stdout, "Printing all vectors after multiplication:\n"); printf("Process %d x -----\n", myrank); printBuf(x, size); printf("Process %d local_x ---\n", myrank); printBuf(local_x, size/nprocs); printf("\n-----------------\n"); */ if(myrank == 0) { //fprintf(stdout, "process %d gives the following vector on iteration %d:\n",myrank, i); } for (j = 0; j < size/nprocs; ++j) { //printf("%f\n", local_x[j]); //x[j] = local_x[j]; } if(myrank == 0) { //fprintf(stdout, "iteration %d: performing gather\n",i); } MPI_Gather(local_x, size/nprocs, MPI_DOUBLE, x, size/nprocs, MPI_DOUBLE, 0, MPI_COMM_WORLD); if(myrank == 0) { //fprintf(stdout, "iteration %d: vector gathered from all nodes:\n", i); for (j = 0; j < size; ++j) { //printf("%f\n", x[j]); } } MPI_Barrier(MPI_COMM_WORLD); } free(local_x); return norm2(x, size); }
double powerMethod(double * mat, double * x, int size, int iter) { double newX[size]; int i, j; for(i = 0; i < iter; i++) { for(j = 0; j < size; j++) { x[j] = x[j] / norm2(x, size); } matVec(mat, x, size, size, x); } return norm2(x, size); }
int main() { int ido = 0; char bmat[] = "I"; int N = 1000; char which[] = "LM"; int nev = 9; double tol = 0; double resid[N]; int ncv = 2*nev+1; double V[ncv*N]; int ldv = N; int iparam[11]; int ipntr[14]; double workd[3*N]; int rvec = 1; char howmny[] = "A"; double* dr = (double*) malloc((nev+1)*sizeof(double)); double* di = (double*) malloc((nev+1)*sizeof(double)); int select[3*ncv]; double z[(N+1)*(nev+1)]; int ldz = N+1; double sigmar=0; double sigmai=0; double workev[3*ncv]; int k; for (k=0; k < 3*N; ++k ) workd[k] = 0; double workl[3*(ncv*ncv) + 6*ncv]; for (k=0; k < 3*(ncv*ncv) + 6*ncv; ++k ) workl[k] = 0; int lworkl = 3*(ncv*ncv) + 6*ncv; int info = 0; iparam[0] = 1; iparam[2] = 10*N; iparam[3] = 1; iparam[6] = 1; dnaupd_(&ido, bmat, &N, which, &nev, &tol, resid, &ncv, V, &ldv, iparam, ipntr, workd, workl, &lworkl, &info); while(ido == 1) { matVec(&(workd[ipntr[0]-1]), &(workd[ipntr[1]-1])); dnaupd_(&ido, bmat, &N, which, &nev, &tol, resid, &ncv, V, &ldv, iparam, ipntr, workd, workl, &lworkl, &info); } dneupd_( &rvec, howmny, select, dr,di, z, &ldz, &sigmar, &sigmai,workev, bmat, &N, which, &nev, &tol, resid, &ncv, V, &ldv, iparam, ipntr, workd, workl, &lworkl, &info); int i; for (i = 0; i < nev; ++i) { printf("%f\n", dr[i]); if(fabs(dr[i] - (double)(1000-i))>1e-6){ free(dr); free(di); exit(EXIT_FAILURE); } } free(dr); free(di); return 0; }
template <class F> void CpuIterativeSolver<F>::setCoupleConstants (const boost::shared_ptr<const CoupleConstants<ftype> >& cc) { matVec ().setCoupleConstants (cc); }