TYPE nrm2( // const ArrayListV<TYPE>& ax //[in] ) { int n = ax.size(); const TYPE* x = ax.getPointer(); return nrm2(n, x, 1); }
VALUE nrmp( // int n, //size of the array, sx.size==sy.size const VALUE* x, // VALUE2 p, // int incx // ) { if (p == 1.0) { return asum(n, x, incx); } if (p == 2.0) { return nrm2(n, x, incx); } VALUE zero = 0.0e+0; VALUE norm = zero; if (n <= 0 || incx <= 0) { return norm; } else if (n == 1) { norm = Abs(x[0]); } else { for (int i = 0; i < (n - 1) * incx; i += incx) { if (x[i] != zero) { norm += pow(Abs(x[i]), p); } } norm = pow(norm, 1.0 / double(p)); } return norm; } // << ------------------------------------------
ColumnBundleTransform::ColumnBundleTransform(const vector3<>& kC, const Basis& basisC, const vector3<>& kD, const ColumnBundleTransform::BasisWrapper& basisDwrapper, int nSpinor, const matrix3<int>& sym, int invert, const matrix3<int>& super) : kC(kC), basisC(basisC), kD(kD), basisD(basisDwrapper.basis), nSpinor(nSpinor), invert(invert) { //Check k-point transformation and determine offset const matrix3<>& metricC = basisC.gInfo->RTR; myassert(nrm2(metricC - (~sym)*metricC*sym) < symmThreshold * nrm2(metricC)); //check symmetry myassert(abs(invert) == 1); //check inversion myassert(nrm2(basisC.gInfo->R * super - basisD.gInfo->R) < symmThreshold * nrm2(basisD.gInfo->R)); //check supercell matrix3<int> affine = sym * invert * super; //net affine transformation double offsetErr; vector3<int> offset = round(kC * affine - kD, &offsetErr); myassert(offsetErr < symmThreshold); //Initialize index map: index.resize(basisC.nbasis); for(size_t n=0; n<basisC.nbasis; n++) { const vector3<int>& iG_C = basisC.iGarr[n]; //C recip lattice coords vector3<int> iG_D = iG_C * affine + offset; //corresponding D recip lattice coords index[n] = basisDwrapper.table[dot(basisDwrapper.pitch, iG_D + basisDwrapper.iGbox)]; //use lookup table to get D index } myassert(*std::min_element(index.begin(), index.end()) >= 0); //make sure all entries were found #ifdef GPU_ENABLED cudaMalloc(&indexGpu, sizeof(int)*index.size()); gpuErrorCheck(); cudaMemcpy(indexGpu, index.data(), sizeof(int)*index.size(), cudaMemcpyHostToDevice); gpuErrorCheck(); indexPref = indexGpu; #else indexPref = index.data(); #endif //Initialize spinor transformation: switch(nSpinor) { case 1: spinorRot = eye(1); break; case 2: { spinorRot = Symmetries::getSpinorRotation(~(basisC.gInfo->R * sym * inv(basisC.gInfo->R))); if(invert<0) { matrix sInvert = zeroes(2,2); sInvert.set(0,1, 1.); sInvert.set(1,0, -1.); spinorRot = conj(sInvert * spinorRot); } break; } default: myassert(!"Invalid value for nSpinor"); } }
/*! */ inline dquater vt2q(const dcovec3& v, const double& theta) {VERBOSE_REPORT; return vr2q( v/(nrm2(v)+DBL_MIN)*std::sin(0.5*theta), std::cos(0.5*theta) ); }
/*! inverse */ inline dquater inv(const dquater& q) {VERBOSE_REPORT; return conj(q)/pow(nrm2(q),2); }
/*************************************** * Conjugate Gradient * * This function will do the CG * * algorithm without preconditioning. * * For optimiziation you must not * * change the algorithm. * *************************************** r(0) = b - Ax(0) p(0) = r(0) rho(0) = <r(0),r(0)> *************************************** for k=0,1,2,...,n-1 q(k) = A * p(k) dot_pq = <p(k),q(k)> alpha = rho(k) / dot_pq x(k+1) = x(k) + alpha*p(k) r(k+1) = r(k) - alpha*q(k) check convergence ||r(k+1)||_2 < eps rho(k+1) = <r(k+1), r(k+1)> beta = rho(k+1) / rho(k) p(k+1) = r(k+1) + beta*p(k) ***************************************/ void cg(const int n, const int nnz, const int maxNNZ, const floatType* data, const int* indices, const int* length, const floatType* b, floatType* x, struct SolverConfig* sc){ floatType* r, *p, *q; floatType alpha, beta, rho, rho_old, dot_pq, bnrm2; int iter; double timeMatvec_s; double timeMatvec=0; int i; floatType temp; /* allocate memory */ r = (floatType*)malloc(n * sizeof(floatType)); p = (floatType*)malloc(n * sizeof(floatType)); q = (floatType*)malloc(n * sizeof(floatType)); #pragma acc data copyin(data[0:n*maxNNZ], indices[0:n*maxNNZ], length[0:n], n, nnz, maxNNZ, b[0:n]) copy(x[0:n]) create(alpha, beta, r[0:n], p[0:n], q[0:n], i, temp) //eigentlich auch copy(x[0:n]) aber error: not found on device??? { DBGMAT("Start matrix A = ", n, nnz, maxNNZ, data, indices, length) DBGVEC("b = ", b, n); DBGVEC("x = ", x, n); /* r(0) = b - Ax(0) */ timeMatvec_s = getWTime(); matvec(n, nnz, maxNNZ, data, indices, length, x, r); //hier inline ausprobieren /*int i, j, k; #pragma acc parallel loop present(data, indices, length, x) for (i = 0; i < n; i++) { r[i] = 0; for (j = 0; j < length[i]; j++) { k = j * n + i; r[i] += data[k] * x[indices[k]]; } }*/ timeMatvec += getWTime() - timeMatvec_s; xpay(b, -1.0, n, r); DBGVEC("r = b - Ax = ", r, n); /* Calculate initial residuum */ nrm2(r, n, &bnrm2); bnrm2 = 1.0 /bnrm2; /* p(0) = r(0) */ memcpy(p, r, n*sizeof(floatType)); DBGVEC("p = r = ", p, n); /* rho(0) = <r(0),r(0)> */ vectorDot(r, r, n, &rho); printf("rho_0=%e\n", rho); for(iter = 0; iter < sc->maxIter; iter++){ DBGMSG("=============== Iteration %d ======================\n", iter); /* q(k) = A * p(k) */ timeMatvec_s = getWTime(); matvec(n, nnz, maxNNZ, data, indices, length, p, q); timeMatvec += getWTime() - timeMatvec_s; DBGVEC("q = A * p= ", q, n); /* dot_pq = <p(k),q(k)> */ vectorDot(p, q, n, &dot_pq); DBGSCA("dot_pq = <p, q> = ", dot_pq); /* alpha = rho(k) / dot_pq */ alpha = rho / dot_pq; DBGSCA("alpha = rho / dot_pq = ", alpha); /* x(k+1) = x(k) + alpha*p(k) */ axpy(alpha, p, n, x); #pragma acc update host(x[0:n]) DBGVEC("x = x + alpha * p= ", x, n); /* r(k+1) = r(k) - alpha*q(k) */ axpy(-alpha, q, n, r); DBGVEC("r = r - alpha * q= ", r, n); rho_old = rho; DBGSCA("rho_old = rho = ", rho_old); /* rho(k+1) = <r(k+1), r(k+1)> */ vectorDot(r, r, n, &rho); DBGSCA("rho = <r, r> = ", rho); /* Normalize the residual with initial one */ sc->residual= sqrt(rho) * bnrm2; /* Check convergence ||r(k+1)||_2 < eps * If the residual is smaller than the CG * tolerance specified in the CG_TOLERANCE * environment variable our solution vector * is good enough and we can stop the * algorithm. */ printf("res_%d=%e\n", iter+1, sc->residual); if(sc->residual <= sc->tolerance) break; /* beta = rho(k+1) / rho(k) */ beta = rho / rho_old; DBGSCA("beta = rho / rho_old= ", beta); /* p(k+1) = r(k+1) + beta*p(k) */ xpay(r, beta, n, p); DBGVEC("p = r + beta * p> = ", p, n); } /* Store the number of iterations and the * time for the sparse matrix vector * product which is the most expensive * function in the whole CG algorithm. */ sc->iter = iter; sc->timeMatvec = timeMatvec; /* Clean up */ free(r); free(p); free(q); }//ende data region }
void Phonon::setup(bool printDefaults) { //Parse input to initialize unit cell: parse(input, e, printDefaults); logSuspend(); parse(input, eSupTemplate); //silently create a copy by re-parsing input (Everything is not trivially copyable) logResume(); //Ensure phonon command specified: if(!sup.length()) die("phonon supercell must be specified using the phonon command.\n"); if(!e.gInfo.S.length_squared()) die("Manual fftbox setting required for phonon. If supercell grid\n" "initialization fails, specify slightly larger manual fftbox.\n"); //Check kpoint and supercell compatibility: if(e.eInfo.qnums.size()>1 || e.eInfo.qnums[0].k.length_squared()) die("phonon requires a Gamma-centered uniform kpoint mesh.\n"); for(int j=0; j<3; j++) { if(!sup[j] || e.eInfo.kfold[j] % sup[j]) { die("kpoint folding %d is not a multiple of supercell count %d for lattice direction %d.\n", e.eInfo.kfold[j], sup[j], j); } eSupTemplate.eInfo.kfold[j] = e.eInfo.kfold[j] / sup[j]; } logPrintf("########### Unit cell calculation #############\n"); SpeciesInfo::Constraint constraintFull; constraintFull.moveScale = 0; constraintFull.type = SpeciesInfo::Constraint::None; for(size_t sp=0; sp<e.iInfo.species.size(); sp++) e.iInfo.species[sp]->constraints.assign(e.iInfo.species[sp]->atpos.size(), constraintFull); e.setup(); if(!e.coulombParams.supercell) e.updateSupercell(true); //force supercell generation nSpins = e.eInfo.spinType==SpinZ ? 2 : 1; nSpinor = e.eInfo.spinorLength(); //Initialize state of unit cell: if(e.cntrl.dumpOnly) { //Single energy calculation so that all dependent quantities have been initialized: logPrintf("\n----------- Energy evaluation at fixed state -------------\n"); logFlush(); e.eVars.elecEnergyAndGrad(e.ener, 0, 0, true); } else elecFluidMinimize(e); logPrintf("# Energy components:\n"); e.ener.print(); logPrintf("\n"); //Determine optimum number of bands for supercell calculation: nBandsOpt = 0; for(int q=e.eInfo.qStart; q<e.eInfo.qStop; q++) { int nBands_q = std::upper_bound(e.eVars.F[q].begin(), e.eVars.F[q].end(), Fcut, std::greater<double>()) - e.eVars.F[q].begin(); nBandsOpt = std::max(nBandsOpt, nBands_q); } mpiUtil->allReduce(nBandsOpt, MPIUtil::ReduceMax); logPrintf("Fcut=%lg reduced nBands from %d to %d per unit cell.\n", Fcut, e.eInfo.nBands, nBandsOpt); //Make unit cell state available on all processes //(since MPI division of qSup and q are different and independent of the map) for(int q=0; q<e.eInfo.nStates; q++) { //Allocate: if(!e.eInfo.isMine(q)) { e.eVars.C[q].init(e.eInfo.nBands, e.basis[q].nbasis * e.eInfo.spinorLength(), &e.basis[q], &e.eInfo.qnums[q]); e.eVars.F[q].resize(e.eInfo.nBands); e.eVars.Hsub_eigs[q].resize(e.eInfo.nBands); if(e.eInfo.fillingsUpdate==ElecInfo::FermiFillingsAux) e.eVars.B[q].init(e.eInfo.nBands, e.eInfo.nBands); } //Broadcast from owner: int qSrc = e.eInfo.whose(q); e.eVars.C[q].bcast(qSrc); e.eVars.F[q].bcast(qSrc); e.eVars.Hsub_eigs[q].bcast(qSrc); if(e.eInfo.fillingsUpdate==ElecInfo::FermiFillingsAux) e.eVars.B[q].bcast(qSrc); } logPrintf("\n------- Configuring supercell and perturbation modes -------\n"); //Grid: eSupTemplate.gInfo.S = Diag(sup) * e.gInfo.S; //ensure exact supercell eSupTemplate.gInfo.R = e.gInfo.R * Diag(sup); prodSup = sup[0] * sup[1] * sup[2]; //Replicate atoms (and related properties): for(size_t sp=0; sp<e.iInfo.species.size(); sp++) { const SpeciesInfo& spIn = *(e.iInfo.species[sp]); SpeciesInfo& spOut = *(eSupTemplate.iInfo.species[sp]); spOut.atpos.clear(); spOut.initialMagneticMoments.clear(); matrix3<> invSup = inv(Diag(vector3<>(sup))); vector3<int> iR; for(iR[0]=0; iR[0]<sup[0]; iR[0]++) for(iR[1]=0; iR[1]<sup[1]; iR[1]++) for(iR[2]=0; iR[2]<sup[2]; iR[2]++) { for(vector3<> pos: spIn.atpos) spOut.atpos.push_back(invSup * (pos + iR)); for(vector3<> M: spIn.initialMagneticMoments) spOut.initialMagneticMoments.push_back(M); //needed only to determine supercell symmetries } spOut.constraints.assign(spOut.atpos.size(), constraintFull); } //Supercell symmetries: eSupTemplate.symm.setup(eSupTemplate); const std::vector< matrix3<int> >& symSup = eSupTemplate.symm.getMatrices(); symSupCart.clear(); eSupTemplate.gInfo.invR = inv(eSupTemplate.gInfo.R); for(const matrix3<int>& m: symSup) symSupCart.push_back(eSupTemplate.gInfo.R * m * eSupTemplate.gInfo.invR); //Pick maximally symmetric orthogonal basis: logPrintf("\nFinding maximally-symmetric orthogonal basis for displacements:\n"); std::vector< vector3<> > dirBasis; { std::multimap<int, vector3<> > dirList; //directions indexed by their stabilizer group cardinality vector3<int> iR; for(iR[0]=0; iR[0]<=+1; iR[0]++) for(iR[1]=-1; iR[1]<=+1; iR[1]++) for(iR[2]=-1; iR[2]<=+1; iR[2]++) if(iR.length_squared()) { //Try low-order lattice vector linear combination: vector3<> n = eSupTemplate.gInfo.R * iR; n *= (1./n.length()); dirList.insert(std::make_pair(nStabilizer(n, symSupCart), n)); //Try low-order reciprocal lattice vector linear combination: n = iR * eSupTemplate.gInfo.invR; n *= (1./n.length()); dirList.insert(std::make_pair(nStabilizer(n, symSupCart), n)); } dirBasis.push_back(dirList.rbegin()->second); //Pick second driection orthogonal to first: std::multimap<int, vector3<> > dirList2; for(auto entry: dirList) { vector3<> n = entry.second; n -= dot(n, dirBasis[0]) * dirBasis[0]; if(n.length_squared() < symmThresholdSq) continue; n *= (1./n.length()); dirList2.insert(std::make_pair(nStabilizer(n, symSupCart), n)); } dirBasis.push_back(dirList2.rbegin()->second); dirBasis.push_back(cross(dirBasis[0], dirBasis[1])); //third direction constrained by orthogonality } for(const vector3<>& n: dirBasis) logPrintf(" [ %+lf %+lf %+lf ] |Stabilizer|: %d\n", n[0], n[1], n[2], nStabilizer(n,symSupCart)); //List all modes: modes.clear(); for(size_t sp=0; sp<e.iInfo.species.size(); sp++) for(size_t at=0; at<e.iInfo.species[sp]->atpos.size(); at++) //only need to move atoms in first unit cell for(int iDir=0; iDir<3; iDir++) { Mode mode; mode.sp = sp; mode.at = at; mode.dir[iDir] = 1.; modes.push_back(mode); } //Find irreducible modes: perturbations.clear(); for(unsigned sp=0; sp<e.iInfo.species.size(); sp++) { int nAtoms = e.iInfo.species[sp]->atpos.size(); int nPert = nAtoms * dirBasis.size(); //generate all perturbations first: std::vector<Perturbation> pertSp(nPert); //perturbations of this species std::vector<matrix> proj(nPert); //projection operator into subspace spanned by star of current perturbation matrix projTot; const auto& atomMap = eSupTemplate.symm.getAtomMap()[sp]; for(int iPert=0; iPert<nPert; iPert++) { pertSp[iPert].sp = sp; pertSp[iPert].at = iPert / dirBasis.size(); pertSp[iPert].dir = dirBasis[iPert % dirBasis.size()]; pertSp[iPert].weight = 1./symSupCart.size(); for(unsigned iSym=0; iSym<symSupCart.size(); iSym++) { int at = atomMap[pertSp[iPert].at][iSym] % nAtoms; //map back to first cell vector3<> dir = symSupCart[iSym] * pertSp[iPert].dir; matrix nHat = zeroes(nPert,1); for(int iDir=0; iDir<3; iDir++) nHat.set(at*3+iDir,0, dir[iDir]); proj[iPert] += pertSp[iPert].weight * nHat * dagger(nHat); } projTot += proj[iPert]; } myassert(nrm2(projTot - eye(nPert)) < symmThreshold); //only select perturbations with distinct subspace projections: std::vector<bool> irred(nPert, true); //whether each perturbation is in irreducible set for(int iPert=0; iPert<nPert; iPert++) { for(int jPert=0; jPert<iPert; jPert++) if(irred[jPert] && nrm2(proj[iPert]-proj[jPert])<symmThreshold) { pertSp[jPert].weight += pertSp[iPert].weight; //send weight of current mode to its image in irreducible set irred[iPert] = false; //this mode will be accounted for upon symmetrization break; } } for(int iPert=0; iPert<nPert; iPert++) if(irred[iPert]) perturbations.push_back(pertSp[iPert]); } logPrintf("\n%d perturbations of the unit cell reduced to %d under symmetries:\n", int(modes.size()), int(perturbations.size())); for(const Perturbation& pert: perturbations) logPrintf("%s %d [ %+lf %+lf %+lf ] %lf\n", e.iInfo.species[pert.sp]->name.c_str(), pert.at, pert.dir[0], pert.dir[1], pert.dir[2], pert.weight*symSupCart.size()); //Determine wavefunction unitary rotations: logPrintf("\nCalculating unitary rotations of unit cell states under symmetries:\n"); stateRot.resize(nSpins); double unitarityErr = 0.; for(int iSpin=0; iSpin<nSpins; iSpin++) { //Find states involved in the supercell Gamma-point: struct Kpoint : public Supercell::KmeshTransform { vector3<> k; //also store k-point for convenience (KmeshTransform doesn't have it) }; std::vector<Kpoint> kpoints; kpoints.reserve(prodSup); const Supercell& supercell = *(e.coulombParams.supercell); for(unsigned ik=0; ik<supercell.kmesh.size(); ik++) { double kSupErr; round(matrix3<>(Diag(sup)) * supercell.kmesh[ik], &kSupErr); if(kSupErr < symmThreshold) //maps to Gamma point { Kpoint kpoint; (Supercell::KmeshTransform&)kpoint = supercell.kmeshTransform[ik]; //copy base class kpoint.k = supercell.kmesh[ik]; kpoint.iReduced += iSpin*(e.eInfo.nStates/nSpins); //point to source k-point with appropriate spin kpoints.push_back(kpoint); } } myassert(int(kpoints.size()) == prodSup); //Initialize basis and qnum for these states: std::vector<QuantumNumber> qnums(prodSup); std::vector<Basis> basis(prodSup); logSuspend(); for(int ik=0; ik<prodSup; ik++) { qnums[ik].k = kpoints[ik].k; qnums[ik].spin = (nSpins==1 ? 0 : (iSpin ? +1 : -1)); qnums[ik].weight = 1./prodSup; basis[ik].setup(e.gInfo, e.iInfo, e.cntrl.Ecut, kpoints[ik].k); } logResume(); //Get wavefunctions for all these k-points: #define whose_ik(ik) (((ik) * mpiUtil->nProcesses())/prodSup) //local MPI division std::vector<ColumnBundle> C(prodSup); std::vector<std::shared_ptr<ColumnBundleTransform::BasisWrapper> > basisWrapper(prodSup); auto sym = e.symm.getMatrices(); //unit cell symmetries for(int ik=0; ik<prodSup; ik++) { C[ik].init(e.eInfo.nBands, basis[ik].nbasis*nSpinor, &basis[ik], &qnums[ik], isGpuEnabled()); if(whose_ik(ik) == mpiUtil->iProcess()) { int q = kpoints[ik].iReduced; C[ik].zero(); basisWrapper[ik] = std::make_shared<ColumnBundleTransform::BasisWrapper>(basis[ik]); ColumnBundleTransform(e.eInfo.qnums[q].k, e.basis[q], qnums[ik].k, *(basisWrapper[ik]), nSpinor, sym[kpoints[ik].iSym], kpoints[ik].invert).scatterAxpy(1., e.eVars.C[q], C[ik],0,1); } } for(int ik=0; ik<prodSup; ik++) C[ik].bcast(whose_ik(ik)); //make available on all processes //Determine max eigenvalue: int nBands = e.eInfo.nBands; double Emax = -INFINITY; for(int q=e.eInfo.qStart; q<e.eInfo.qStop; q++) Emax = std::max(Emax, e.eVars.Hsub_eigs[q].back()); mpiUtil->allReduce(Emax, MPIUtil::MPIUtil::ReduceMax); double EmaxValid = +INFINITY; //Loop over supercell symmetry operations: PeriodicLookup<QuantumNumber> plook(qnums, e.gInfo.GGT); stateRot[iSpin].resize(symSupCart.size()); for(size_t iSym=0; iSym<symSupCart.size(); iSym++) { matrix3<> symUnitTmp = e.gInfo.invR * symSupCart[iSym] * e.gInfo.R; //in unit cell lattice coordinates #define SymmErrMsg \ "Supercell symmetries do not map unit cell k-point mesh onto itself.\n" \ "This implies that the supercell is more symmetric than the unit cell!\n" \ "Please check to make sure that you have used the minimal unit cell.\n\n" matrix3<int> symUnit; for(int j1=0; j1<3; j1++) for(int j2=0; j2<3; j2++) { symUnit(j1,j2) = round(symUnitTmp(j1,j2)); if(fabs(symUnit(j1,j2) - symUnitTmp(j1,j2)) > symmThreshold) die(SymmErrMsg) } //Find image kpoints under rotation: (do this for all k-points so that all processes exit together if necessary) std::vector<int> ikRot(prodSup); for(int ik=0; ik<prodSup; ik++) { size_t ikRotCur = plook.find(qnums[ik].k * symUnit); if(ikRotCur==string::npos) die(SymmErrMsg) ikRot[ik] = ikRotCur; } #undef SymmErrMsg //Calculate unitary transformation matrix: stateRot[iSpin][iSym].init(prodSup, nBands); for(int ik=0; ik<prodSup; ik++) if(whose_ik(ikRot[ik]) == mpiUtil->iProcess()) //MPI division by target k-point { ColumnBundle Crot = C[ikRot[ik]].similar(); Crot.zero(); ColumnBundleTransform(qnums[ik].k, basis[ik], qnums[ikRot[ik]].k, *(basisWrapper[ikRot[ik]]), nSpinor, symUnit, +1).scatterAxpy(1., C[ik], Crot,0,1); matrix Urot = Crot ^ O(C[ikRot[ik]]); //will be unitary if Crot is a strict unitary rotation of C[ikRot[ik]] //Check maximal subspace that is unitary: (remiander must be incomplete degenerate subspace) int nBandsValid = nBands; while(nBandsValid && !isUnitary(Urot(0,nBandsValid, 0,nBandsValid))) nBandsValid--; if(nBandsValid<nBands) { //Update energy range of validity: EmaxValid = std::min(EmaxValid, e.eVars.Hsub_eigs[kpoints[ik].iReduced][nBandsValid]); //Make valid subspace exactly unitary: matrix UrotSub = Urot(0,nBandsValid, 0,nBandsValid); matrix UrotOverlap = dagger(UrotSub) * UrotSub; UrotSub = UrotSub * invsqrt(UrotOverlap); //make exactly unitary unitarityErr += std::pow(nrm2(UrotOverlap - eye(nBandsValid)), 2); //Zero out invalid subspace: Urot.zero(); Urot.set(0,nBandsValid, 0,nBandsValid, UrotSub); } stateRot[iSpin][iSym].set(ik, ikRot[ik], Urot); } stateRot[iSpin][iSym].allReduce(); } #undef whose_ik mpiUtil->allReduce(EmaxValid, MPIUtil::ReduceMin); if(nSpins>1) logPrintf("\tSpin %+d: ", iSpin ? +1 : -1); else logPrintf("\t"); logPrintf("Matrix elements valid for "); if(std::isfinite(EmaxValid)) logPrintf("E < %+.6lf (Emax = %+.6lf) due to incomplete degenerate subspaces.\n", EmaxValid, Emax); else logPrintf("all available states (all degenerate subspaces are complete).\n"); } mpiUtil->allReduce(unitarityErr, MPIUtil::ReduceSum); unitarityErr = sqrt(unitarityErr / (nSpins * prodSup * symSupCart.size())); logPrintf("\tRMS unitarity error in valid subspaces: %le\n", unitarityErr); }
inline bool isUnitary(const matrix& U) { return nrm2(U*dagger(U) - eye(U.nCols())) < symmThreshold; }
/*! solve */ int32_t gmres ( const CPPL::dgsmatrix& A, const CPPL::dcovector& b, CPPL::dcovector& x, const double& eps ) { /////////////////////////////////////////////// //////////////// preconditioner /////////////// /////////////////////////////////////////////// CPPL::dgbmatrix Minv(x.l, x.l, 0, 0); //////// no precondition //////// Minv.identity(); /////////////////////////////////////////////// ///////////////// mid values ////////////////// /////////////////////////////////////////////// long m(10);//restart number CPPL::dcovector r(b-A*x); CPPL::dcovector s(m+1), co(m+1), si(m+1), w; std::vector<CPPL::dcovector> v(m+1); CPPL::dgematrix H(m+1,m); //H.zero(); //co.zero(); //si.zero(); //s.zero(); //////// norm //////// double norm_r, norm_r_min(DBL_MAX); const double norm_r_ini(fabs(damax(r))); std::cerr << "[NOTE]@gmres: norm_r_ini=" << norm_r_ini << ", eps=" << eps<< std::endl; if( norm_r_ini<DBL_MIN ){ std::cerr << "[NOTE]@gmres: already converged. v(^^)" << std::endl; return 0; } /////////////////////////////////////////////// //////////////////// loop ///////////////////// /////////////////////////////////////////////// int itc(1); //int itmax(int(2.1*x.l)); int itmax(int(1.1*x.l)); //int itmax(int(0.6*x.l)); do{ std::cerr << "** itc=" << itc << " ********************************************" << std::endl; //////// 0 //////// v[0] =r/nrm2(r); s.zero(); s(0) =nrm2(r); for(long i=0; i<m; i++){ //std::cerr << "++++ i=" << i << " ++++" << std::endl; w =A*v[i]; w =Minv*w; for(long k=0; k<i+1; k++){ H(k,i) =w%v[k]; w -=H(k,i)*v[k]; } H(i+1,i) =nrm2(w); v[i+1] =w/H(i+1,i); //// J,s //// for(long k=0; k<i; k++){ rotate(H(k,i), H(k+1,i), co(k), si(k)); } make_rotator( H(i,i), H(i+1,i), co(i), si(i) ); //std::cerr << "co = " << t(co) << std::endl; std::cerr << "si = " << t(si) << std::endl; rotate( H(i,i), H(i+1,i), co(i), si(i) );//necessary //std::cerr << "H =\n" << H << std::endl; rotate( s(i), s(i+1), co(i), si(i) ); //std::cerr << "s = " << t(s) << std::endl; } //for(long i=0; i<m+1; i++){ for(long j=i+1; j<m+1; j++){ std::cerr << "vv = " << v[i]%v[j] << std::endl; } }// v check //std::cerr << "H =\n" << H << std::endl; //std::cerr << "s =" << t(s) << std::endl; //for(long i=0; i<m+1; i++){ std::cerr << "v["<<i<<"] =" << t(v[i]) << std::flush; } //////// y //////// CPPL::dcovector y(s); for(long i=m-1; i>=0; i--){ y(i) /= H(i,i); for(long j=i-1; j>=0; j--){ y(j) -= H(j,i) * y(i); } } //std::cerr << "H*y = " << t(H*y) << std::endl; //std::cerr << "s = " << t(s) << std::endl; //std::cerr << "y = " << t(s) << std::endl; //////// update //////// for(long i=0; i<m; i++){ x += v[i] * y(i); } //std::cerr << "x = " << t(x) << std::endl; //////// residual //////// r =b-A*x; r =Minv*r; //std::cerr << "r = " << t(r) << std::endl; //////// convergence check //////// norm_r =fabs(damax(r)); std::cerr << "norm_r = " << norm_r << std::endl; if( isnan(norm_r) ){ break; }//failed if( !std::isnormal(norm_r) ){ break; }//failed if( !std::isfinite(norm_r) ){ break; }//failed if( norm_r>1e3*norm_r_ini ){ break; }//failed (getting so worse) if( norm_r<=eps ){//r satistied std::cerr << "[NOTE]@gmres: converged. v(^^) itc=" << itc << "/" << itmax << ", norm=" << norm_r << std::endl; return 0; } }while(++itc<itmax); //////// failed //////// std::cerr << "[NOTE]@gmres: itc=" << itc << ", norm=" << norm_r << ", r_satisfied=" << (norm_r<=eps) << std::endl; std::cerr << "[NOTE]@gmres: failed to converge. orz" << std::endl; return 1; }
void KPMLinalg::normalize( my::scalar* x ) { const my::real norm=1./nrm2(x); scale(norm,x); };
Quaternion inverse() const { const double inv_nrm2 = 1.0 / nrm2(); return Quaternion(_real * inv_nrm2, - _imaginary * inv_nrm2); }
double nrm() const { return std::sqrt(nrm2()); }
structmass1 += mass; COMVel1 += Sim.particleList[ID].getVelocity() * mass; COMPos1 += Sim.particleList[ID].getPosition() * mass; } BOOST_FOREACH(const size_t& ID, range2) { double mass = Sim.dynamics.getSpecies(Sim.particleList[ID]).getMass(ID); structmass2 += mass; COMVel2 += Sim.particleList[ID].getVelocity() * mass; COMPos2 += Sim.particleList[ID].getPosition() * mass; } COMVel1 /= structmass1; COMVel2 /= structmass2; COMPos1 /= structmass1; COMPos2 /= structmass2; rij = COMPos1 - COMPos2; vij = COMVel1 - COMVel2; Sim.dynamics.BCs().applyBC(rij, vij); rvdot = (rij | vij); r2 = rij.nrm2(); v2 = vij.nrm2(); }
/* Computes the norm of (I - A). */ double minusIdNrm2(Mat mA) { addId(-1, mA); // sort of a hack, but it works very well double norm = nrm2(mA); addId(1, mA); return norm; }
int main(int argc, char** argv) { MPI_Init(&argc, &argv); /*# Init #*/ int rankWorld, sizeWorld; MPI_Comm_size(MPI_COMM_WORLD, &sizeWorld); MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld); const HpddmOption* const opt = HpddmOptionGet(); HpddmOptionParse(opt, argc, argv, rankWorld == 0); { char* val[4] = { "Nx=<100>", "Ny=<100>", "overlap=<1>", "generate_random_rhs=<0>" }; char* desc[4] = { "Number of grid points in the x-direction.", "Number of grid points in the y-direction.", "Number of grid points in the overlap.", "Number of generated random right-hand sides." }; HpddmOptionParseInts(opt, argc, argv, 4, val, desc); val[0] = "symmetric_csr=(0|1)"; desc[0] = "Assemble symmetric matrices."; val[1] = "nonuniform=(0|1)"; desc[1] = "Use a different number of eigenpairs to compute on each subdomain."; HpddmOptionParseArgs(opt, argc, argv, 2, val, desc); } int sizes[8]; int* connectivity[8]; int o[8]; int neighbors = 0; HpddmMatrixCSR* Mat, *MatNeumann = NULL; K* f, *sol; underlying_type* d; int ndof; generate(rankWorld, sizeWorld, &neighbors, o, sizes, connectivity, &ndof, &Mat, &MatNeumann, &d, &f, &sol); unsigned short mu = HpddmOptionApp(opt, "generate_random_rhs"); int status = 0; if(sizeWorld > 1) { HpddmSchwarz* A = HpddmSchwarzCreate(Mat, neighbors, o, sizes, connectivity); for(int i = 0; i < neighbors; ++i) free(connectivity[i]); HpddmSchwarzMultiplicityScaling(A, d); HpddmSchwarzInitialize(A, d); if(mu != 0) HpddmSchwarzScaledExchange(A, f, mu); else mu = 1; if(HpddmOptionSet(opt, "schwarz_coarse_correction")) { double* addr = HpddmOptionAddr(opt, "geneo_nu"); unsigned short nu = *addr; if(nu > 0) { if(HpddmOptionApp(opt, "nonuniform")) *addr += MAX((int)(-*addr + 1), pow(-1, rankWorld) * rankWorld); HpddmSchwarzSolveGEVP(A, MatNeumann); nu = HpddmOptionVal(opt, "geneo_nu"); } else { nu = 1; K** deflation = malloc(sizeof(K*)); *deflation = malloc(sizeof(K) * ndof); for(int i = 0; i < ndof; ++i) deflation[0][i] = 1.0; HpddmSetVectors(HpddmSchwarzPreconditioner(A), deflation); } HpddmInitializeCoarseOperator(HpddmSchwarzPreconditioner(A), nu); HpddmSchwarzBuildCoarseOperator(A, MPI_COMM_WORLD); /*# FactorizationEnd #*/ } HpddmSchwarzCallNumfact(A); if(rankWorld != 0) HpddmOptionRemove(opt, "verbosity"); const MPI_Comm* comm = HpddmGetCommunicator(HpddmSchwarzPreconditioner(A)); /*# Solution #*/ int it = HpddmSolve(A, f, sol, mu, comm); /*# SolutionEnd #*/ underlying_type* storage = malloc(sizeof(underlying_type) * 2 * mu); HpddmSchwarzComputeResidual(A, sol, f, storage, mu); if(rankWorld == 0) for(unsigned short nu = 0; nu < mu; ++nu) { if(nu == 0) printf(" --- residual = "); else printf(" "); printf("%e / %e", storage[1 + 2 * nu], storage[2 * nu]); if(mu > 1) printf(" (rhs #%d)", nu + 1); printf("\n"); } if(it > ((int)HpddmOptionVal(opt, "krylov_method") == 6 ? 60 : 45)) status = 1; else { for(unsigned short nu = 0; nu < mu; ++nu) if(storage[1 + 2 * nu] / storage[2 * nu] > 1.0e-2) status = 1; } free(storage); if(HpddmOptionVal(opt, "geneo_nu") == 0) HpddmDestroyVectors(HpddmSchwarzPreconditioner(A)); HpddmSchwarzDestroy(A); } else { HpddmSubdomain* S = NULL; HpddmSubdomainNumfact(&S, Mat); mu = MAX(1, mu); HpddmSubdomainSolve(S, f, sol, mu); int one = 1; underlying_type* nrmb = malloc(sizeof(underlying_type) * 2 * mu); for(unsigned short nu = 0; nu < mu; ++nu) nrmb[nu] = nrm2(&ndof, f + nu * ndof, &one); K* tmp = malloc(sizeof(K) * mu * ndof); HpddmCSRMM(Mat, sol, tmp, mu); K minus = -1; ndof *= mu; axpy(&ndof, &minus, f, &one, tmp, &one); ndof /= mu; underlying_type* nrmAx = nrmb + mu; for(unsigned short nu = 0; nu < mu; ++nu) { nrmAx[nu] = nrm2(&ndof, tmp + nu * ndof, &one); if(nu == 0) printf(" --- residual = "); else printf(" "); printf("%e / %e", nrmAx[nu], nrmb[nu]); if(mu > 1) printf(" (rhs #%d)", nu + 1); printf("\n"); if(nrmAx[nu] / nrmb[nu] > (sizeof(underlying_type) == sizeof(double) ? 1.0e-6 : 1.0e-2)) status = 1; } free(tmp); free(nrmb); HpddmSubdomainDestroy(S); HpddmMatrixCSRDestroy(Mat); } free(d); if(HpddmOptionSet(opt, "schwarz_coarse_correction") && HpddmOptionVal(opt, "geneo_nu") > 0) HpddmMatrixCSRDestroy(MatNeumann); free(sol); free(f); MPI_Finalize(); return status; }
LatticeMinimizer::LatticeMinimizer(Everything& e) : e(e), Rorig(e.gInfo.R) { logPrintf("\n--------- Lattice Minimization ---------\n"); //Ensure that lattice-move-scale is commensurate with symmetries: std::vector<matrix3<int>> sym = e.symm.getMatrices(); for(const matrix3<int>& m: sym) for(int i=0; i<3; i++) for(int j=0; j<3; j++) if(m(i,j) && e.cntrl.lattMoveScale[i] != e.cntrl.lattMoveScale[j]) die("latt-move-scale is not commensurate with symmetries:\n" "\t(Lattice vectors #%d and #%d are connected by symmetry,\n" "\tbut have different move scale factors %lg != %lg).\n", i, j, e.cntrl.lattMoveScale[i], e.cntrl.lattMoveScale[j]); //Check which lattice vectors can be altered: vector3<bool> isFixed, isTruncated = e.coulombParams.isTruncated(); for(int k=0; k<3; k++) isFixed[k] = (e.cntrl.lattMoveScale[k]==0.) || isTruncated[k]; //Create a orthonormal basis for strain commensurate with symmetries: for(int k=0; k<6; k++) { //Initialize a basis element for arbitrary symmetric matrices: matrix3<int> s; //all zero: if(k<3) //diagonal strain { s(k,k) = 1; if(isFixed[k]) continue; //strain alters fixed direction } else //off-diagonal strain { int i=(k+1)%3; int j=(k+2)%3; s(i,j) = s(j,i) = 1; if(isFixed[i] || isFixed[j]) continue; //strain alters fixed direction } //Symmetrize: matrix3<int> sSym; for(const matrix3<int>& m: sym) { matrix3<int> mInv = det(m) * adjugate(m); //since |det(m)| = 1 sSym += mInv * s * m; } //Orthonormalize w.r.t previous basis elements: matrix3<> strain(sSym); //convert from integer to double matrix for(const matrix3<>& sPrev: strainBasis) strain -= sPrev * dot(sPrev, strain); double strainNorm = nrm2(strain); if(strainNorm < symmThresholdSq) continue; //linearly dependent strainBasis.push_back((1./strainNorm) * strain); } if(!strainBasis.size()) die("All lattice-vectors are constrained by coulomb truncation and/or\n" "latt-move-scale: please disable lattice minimization.\n"); //Print initialization status: e.latticeMinParams.nDim = strainBasis.size(); logPrintf("Minimization of dimension %lu over strains spanned by:\n", strainBasis.size()); for(const matrix3<>& s: strainBasis) { s.print(globalLog, " %lg "); logPrintf("\n"); } h = 1e-5; }
Results* join_clusters2_restart (double *x,//array/matrix of data SymNoDiag *W,//lower triangle of weight matrix unsigned int Px,//problem size double lambda,//starting point in regularization path double join_thresh, //tolerance for equality of points double opt_thresh, //tolerance for optimality double lambda_factor,//increase of lambda after optimality double smooth,//smoothing parameter int maxit, int linesearch_freq,//how often to do a linesearch? if 0, never. if //n>0, do n-1 linesearch steps for every //decreasing step size step. set this to 2 if //unsure. int linesearch_points,//how many points to check along the gradient //direction. set to 10 if unsure. int check_splits, int target_cluster, int verbose ){ unsigned int N = W->N; //W->print(); double old_lambda=0; std::vector<int> rows,rowsj; std::vector<int>::iterator rowit,ri,rj; std::list< std::vector<int> > clusters,tocheck; std::list< std::vector<int> >::iterator it,cj; unsigned int i,k,j; int tried_restart; for(i=0;i<N;i++){ rows.assign(1,i); clusters.push_back(rows); } double *old_alpha = new double[N*Px]; double *alpha = new double[N*Px]; double *xbar = new double[N*Px]; double *dir = new double[N*Px]; for(i=0;i<N*Px;i++){ alpha[i]=xbar[i]=x[i]; } Matrix amat(alpha,N,Px),xmat(x,N,Px); SymNoDiag diffs(N); diffs.calc_diffs(clusters,amat,nrm2); //store initial trivial solution Results *results = new Results(N,Px,opt_thresh); if(target_cluster==0)results->add(alpha,0,0); double weight,diff,step; while(clusters.size()>1){ double grad=opt_thresh; int iteration=1; tried_restart=0; //if we use the general (slower) algorithm for any weights, then //split the clusters to individual points if(check_splits){ clusters.clear(); //reassign original clusters for(i=0;i<N;i++){ rows.assign(1,i); clusters.push_back(rows); } //recopy original xbar for(i=0;i<N*Px;i++){ xbar[i]=x[i]; } } while(grad>=opt_thresh){ //first calc gradients grad = 0; for(it=clusters.begin();it!=clusters.end();it++){ rows = *it; i = rows[0]; for(k=0;k<Px;k++){ dir[i+k*N] = xbar[i+k*N] - alpha[i+k*N]; } for(cj=clusters.begin();cj!=clusters.end();cj++){ if(it!=cj){ rowsj = *cj; j=rowsj[0]; weight=0; diff = *diffs(i,j); if(diff!=0){ if(smooth!=0){ diff *= diff; //now squared l2 norm diff += smooth; //add smoothing parameter under sqrt diff = sqrt(diff);//put sqrt back } for(ri=rows.begin();ri!=rows.end();ri++){ for(rj=rowsj.begin();rj!=rowsj.end();rj++){ weight += W->getval(*ri,*rj); } } //weight *= lambda / diff / ((double)(N-1)) / ((double)rows.size()); weight *= lambda / diff / ((double)rows.size()); for(k=0;k<Px;k++){ dir[i+k*N] += weight * (alpha[j+k*N]-alpha[i+k*N]); } } } } grad += nrm2(Array(dir+i,N,Px)); } //store this iteration //results->add(alpha,lambda,grad); //then take a step if(linesearch_freq==0 || (iteration % linesearch_freq)==0 ){ //Decreasing step size //TDH and pierre 18 jan 2011 try sqrt dec step size step=1/((double)iteration); //step=1/sqrt((double)iteration); if(verbose>=2)printf("grad %f step %f it %d\n",grad,step,iteration); take_step(clusters,alpha,dir,N,Px,step); }else{ double cost_here,cost_step; std::map<double,double> cost_steps; std::map<double,double>::iterator step1,step2; for(i=0;i<N*Px;i++)old_alpha[i]=alpha[i];//copy alpha //compare current cost to cost after stepping in gradient direction cost_here=cost_step=calc_cost(clusters,amat,xmat,W,diffs,lambda); step = 0; cost_steps.insert(std::pair<double,double>(cost_here,0)); while(cost_step<=cost_here){ take_step(clusters,alpha,dir,N,Px,1); step += 1; diffs.calc_diffs(clusters,amat,nrm2); cost_step=calc_cost(clusters,amat,xmat,W,diffs,lambda); if(verbose>=2) printf("cost %.10f step %f cost_here %f\n",cost_step,step,cost_here); cost_steps.insert(std::pair<double,double>(cost_step,step)); } for(int cuts=0;cuts<linesearch_points;cuts++){ step1=step2=cost_steps.begin(); step2++; step = (step1->second + step2->second)/2; for(i=0;i<N*Px;i++){ alpha[i]=old_alpha[i]; } take_step(clusters,alpha,dir,N,Px,step); diffs.calc_diffs(clusters,amat,nrm2); cost_step=calc_cost(clusters,amat,xmat,W,diffs,lambda); if(verbose>=2)printf("cost %.10f step %f %d\n",cost_step,step,cuts); cost_steps.insert(std::pair<double,double>(cost_step,step)); } cost_steps.clear(); } if(iteration++ > maxit){ if(tried_restart){ printf("max iteration %d exit\n",maxit); delete old_alpha; delete alpha; delete xbar; delete dir; return results; }else{ if(verbose>=1)printf("max iterations, trying restart from x\n"); tried_restart=1; iteration=1; for(i=0;i<N*Px;i++)alpha[i]=x[i]; } } //calculate differences diffs.calc_diffs(clusters,amat,nrm2); //check for joins JoinPair tojoin; while(dojoin(tojoin=check_clusters_thresh(&clusters,diffs,join_thresh))){ //if(verbose>=1) // printf("join: %d %d\n",tojoin.first->front(),tojoin.second->front()); int ni=tojoin.first->size(); int nj=tojoin.second->size(); i=tojoin.first->front(); j=tojoin.second->front(); tojoin.first->insert(tojoin.first->end(), tojoin.second->begin(), tojoin.second->end()); for(k=0;k<Px;k++){ alpha[i+k*N] = (alpha[i+k*N]*ni + alpha[j+k*N]*nj)/(ni+nj); xbar[i+k*N] = (xbar[i+k*N]*ni + xbar[j+k*N]*nj)/(ni+nj); } clusters.erase(tojoin.second); iteration=1; if(clusters.size()>1){ diffs.calc_diffs(clusters,amat,nrm2);//inefficient }else{ grad=0;//so we can escape from the last optimization loop } } }//while(grad>=opt_thresh) if(verbose>=1) printf("solution iteration %d lambda %f nclusters %d\n", iteration,lambda,(int)clusters.size()); if(target_cluster == 0){ //for each cluster, there may be several points. we store the //alpha value just in the row of the first point. thus here we //copy this value to the other rows before copying the optimal //alpha to results. for(it=clusters.begin();it!=clusters.end();it++){ rows = *it; if(rows.size()>1){ for(i=1;i<rows.size();i++){ for(k=0;k<Px;k++){ alpha[rows[i]+k*N] = alpha[rows[0]+k*N]; } } } } results->add(alpha,lambda,grad); } //haven't yet reached the target number of clusters, multiply //lambda by lambda_factor and continue along the path if((int)clusters.size()>target_cluster){ old_lambda=lambda; lambda *= lambda_factor; } //if we have passed the target cluster number then decrease //lambda and go look for it! if((int)clusters.size()<target_cluster){ if(verbose>=1){ printf("missed target %d, going back for it\n",target_cluster); } lambda = (lambda+old_lambda)/2; clusters.clear(); //reassign original clusters for(i=0;i<N;i++){ rows.assign(1,i); clusters.push_back(rows); } //recopy original xbar for(i=0;i<N*Px;i++){ xbar[i]=x[i]; } } //this is the number of clusters that we were looking for, //save and quit! if((int)clusters.size()==target_cluster){ for(it=clusters.begin();it!=clusters.end();it++){ rows = *it; if(rows.size()>1){ for(i=1;i<rows.size();i++){ for(k=0;k<Px;k++){ alpha[rows[i]+k*N] = alpha[rows[0]+k*N]; } } } } results->add(alpha,lambda,grad); if(verbose>=1)printf("got target cluster %d exit\n",target_cluster); delete old_alpha; delete alpha; delete xbar; delete dir; return results; } } //TODO: consolidate cleanup... just use data structures that //automatically clean themselves up when the function exits. delete old_alpha; delete alpha; delete xbar; delete dir; return results; }
//============================================================================= bool minres ( const CPPL::dsymatrix& A, CPPL::dcovector& x, const double& eps ) { const CPPL::dcovector y(x); CPPL::dcovector r(y); double beta2(nrm2(r)), beta3; double rho0(1.0), rho1(beta2), rho2; double rhop(0.0); double c0(0.0), c1(-1.0), c2; double s0(0.0), s1(0.0), s2; double f(beta2); CPPL::dcovector p1(x.l), p2(r/beta2), p3; CPPL::dcovector q0(x.l), q1(x.l), q2; x.zero(); p1.zero(); q0.zero(); q1.zero(); int itc(0); const int itmax(2*x.l); while( (fabs(f)>eps || fabs(damax(y-A*x))>eps) && itc<itmax){ std::cout << itc << " " << fabs(damax(y-A*x)) << std::endl; //std::cerr << "itc=" << itc << ", fabs(f)=" << fabs(f) << std::endl; CPPL::dcovector Ap2(A*p2), z; z =Ap2-beta2*p1; double alpha; alpha =Ap2%p2; p3 =z-alpha*p2; beta3 =nrm2(p3); p3 /=beta3; double d, h; d =(alpha-rhop*c0)*s1; h =beta2*s0; rhop =-beta2*c0*s1 -alpha*c1; rho2 =sqrt(pow(rhop,2)+pow(beta3,2)); c2 =rhop/rho2; s2 =beta3/rho2; CPPL::dcovector zp; zp =p2 -(h/rho0)*q0; q2 =zp -(d/rho1)*q1; double t; t =f*c2; f *=s2; x +=(t/rho2)*q2; beta2=beta3; rho0=rho1; rho1=rho2; c0=c1; c1=c2; s0=s1; s1=s2; swap(p1,p2); swap(p2,p3); swap(q0,q1); swap(q1,q2); itc++; } std::cerr << "itc=" << itc << " fabs(damax(y-A*x))=" << fabs(damax(y-A*x)) << std::endl; //std::cerr << "itc=" << itc << " fabs(f)=" << fabs(f) << std::endl; if(itc<itmax){ return 0; } else{ return 1; } }