Example #1
0
TYPE nrm2( //
		const ArrayListV<TYPE>& ax  //[in]
		) {
	int n = ax.size();
	const TYPE* x = ax.getPointer();
	return nrm2(n, x, 1);
}
Example #2
0
VALUE nrmp( //
		int n,        //size of the array, sx.size==sy.size
		const VALUE* x,    //
		VALUE2 p,         //
		int incx       //
		) {
	if (p == 1.0) {
		return asum(n, x, incx);
	}
	if (p == 2.0) {
		return nrm2(n, x, incx);
	}
	VALUE zero = 0.0e+0;
	VALUE norm = zero;
	if (n <= 0 || incx <= 0) {
		return norm;
	} else if (n == 1) {
		norm = Abs(x[0]);
	} else {
		for (int i = 0; i < (n - 1) * incx; i += incx) {
			if (x[i] != zero) {
				norm += pow(Abs(x[i]), p);
			}
		}
		norm = pow(norm, 1.0 / double(p));
	}
	return norm;
}  // << ------------------------------------------
ColumnBundleTransform::ColumnBundleTransform(const vector3<>& kC, const Basis& basisC, const vector3<>& kD,
	const ColumnBundleTransform::BasisWrapper& basisDwrapper, int nSpinor, const matrix3<int>& sym, int invert, const matrix3<int>& super)
: kC(kC), basisC(basisC), kD(kD), basisD(basisDwrapper.basis), nSpinor(nSpinor), invert(invert)
{
	//Check k-point transformation and determine offset
	const matrix3<>& metricC = basisC.gInfo->RTR;
	myassert(nrm2(metricC - (~sym)*metricC*sym) < symmThreshold * nrm2(metricC)); //check symmetry
	myassert(abs(invert) == 1); //check inversion
	myassert(nrm2(basisC.gInfo->R * super - basisD.gInfo->R) < symmThreshold * nrm2(basisD.gInfo->R)); //check supercell
	matrix3<int> affine = sym * invert * super; //net affine transformation
	double offsetErr;
	vector3<int> offset = round(kC * affine - kD, &offsetErr);
	myassert(offsetErr < symmThreshold);
	
	//Initialize index map:
	index.resize(basisC.nbasis);
	for(size_t n=0; n<basisC.nbasis; n++)
	{	const vector3<int>& iG_C = basisC.iGarr[n]; //C recip lattice coords
		vector3<int> iG_D = iG_C * affine + offset; //corresponding D recip lattice coords
		index[n] = basisDwrapper.table[dot(basisDwrapper.pitch, iG_D + basisDwrapper.iGbox)]; //use lookup table to get D index
	}
	myassert(*std::min_element(index.begin(), index.end()) >= 0); //make sure all entries were found
	#ifdef GPU_ENABLED
	cudaMalloc(&indexGpu, sizeof(int)*index.size()); gpuErrorCheck();
	cudaMemcpy(indexGpu, index.data(), sizeof(int)*index.size(), cudaMemcpyHostToDevice); gpuErrorCheck();
	indexPref = indexGpu;
	#else
	indexPref = index.data();
	#endif
	
	//Initialize spinor transformation:
	switch(nSpinor)
	{	case 1: spinorRot = eye(1); break;
		case 2: 
		{	spinorRot = Symmetries::getSpinorRotation(~(basisC.gInfo->R * sym * inv(basisC.gInfo->R)));
			if(invert<0)
			{	matrix sInvert = zeroes(2,2);
				sInvert.set(0,1, 1.);
				sInvert.set(1,0, -1.);
				spinorRot = conj(sInvert * spinorRot);
			}
			break;
		}
		default: myassert(!"Invalid value for nSpinor");
	}
}
/*!  */
inline dquater vt2q(const dcovec3& v, const double& theta)
{VERBOSE_REPORT;
  return vr2q( v/(nrm2(v)+DBL_MIN)*std::sin(0.5*theta), std::cos(0.5*theta) );
}
/*! inverse */
inline dquater inv(const dquater& q)
{VERBOSE_REPORT;
  return conj(q)/pow(nrm2(q),2);
}
/***************************************
 *         Conjugate Gradient          *
 *   This function will do the CG      *
 *  algorithm without preconditioning. *
 *    For optimiziation you must not   *
 *        change the algorithm.        *
 ***************************************
 r(0)    = b - Ax(0)
 p(0)    = r(0)
 rho(0)    =  <r(0),r(0)>                
 ***************************************
 for k=0,1,2,...,n-1
   q(k)      = A * p(k)                 
   dot_pq    = <p(k),q(k)>             
   alpha     = rho(k) / dot_pq
   x(k+1)    = x(k) + alpha*p(k)      
   r(k+1)    = r(k) - alpha*q(k)     
   check convergence ||r(k+1)||_2 < eps  
	 rho(k+1)  = <r(k+1), r(k+1)>         
   beta      = rho(k+1) / rho(k)
   p(k+1)    = r(k+1) + beta*p(k)      
***************************************/
void cg(const int n, const int nnz, const int maxNNZ, const floatType* data, const int* indices, const int* length, const floatType* b, floatType* x, struct SolverConfig* sc){

	floatType* r, *p, *q;
	floatType alpha, beta, rho, rho_old, dot_pq, bnrm2;
	int iter;
 	double timeMatvec_s;
 	double timeMatvec=0;
	int i;
	floatType temp;
	
	/* allocate memory */
	r = (floatType*)malloc(n * sizeof(floatType));
	p = (floatType*)malloc(n * sizeof(floatType));
	q = (floatType*)malloc(n * sizeof(floatType));
	
#pragma acc data copyin(data[0:n*maxNNZ], indices[0:n*maxNNZ], length[0:n], n, nnz, maxNNZ, b[0:n]) copy(x[0:n]) create(alpha, beta, r[0:n], p[0:n], q[0:n], i, temp) //eigentlich auch copy(x[0:n]) aber error: not found on device???
{
	DBGMAT("Start matrix A = ", n, nnz, maxNNZ, data, indices, length)
	DBGVEC("b = ", b, n);
	DBGVEC("x = ", x, n);

	/* r(0)    = b - Ax(0) */
	timeMatvec_s = getWTime();
	matvec(n, nnz, maxNNZ, data, indices, length, x, r);
//hier inline ausprobieren
/*int i, j, k;
#pragma acc parallel loop present(data, indices, length, x)
	for (i = 0; i < n; i++) {
		r[i] = 0;
		for (j = 0; j < length[i]; j++) {
			k = j * n + i;
			r[i] += data[k] * x[indices[k]];
		}
	}*/
	timeMatvec += getWTime() - timeMatvec_s;
	xpay(b, -1.0, n, r);
	DBGVEC("r = b - Ax = ", r, n);
	

	/* Calculate initial residuum */
	nrm2(r, n, &bnrm2);
	bnrm2 = 1.0 /bnrm2;

	/* p(0)    = r(0) */
	memcpy(p, r, n*sizeof(floatType));
	DBGVEC("p = r = ", p, n);

	/* rho(0)    =  <r(0),r(0)> */
	vectorDot(r, r, n, &rho);
	printf("rho_0=%e\n", rho);

	for(iter = 0; iter < sc->maxIter; iter++){
		DBGMSG("=============== Iteration %d ======================\n", iter);
		/* q(k)      = A * p(k) */
		timeMatvec_s = getWTime();
		matvec(n, nnz, maxNNZ, data, indices, length, p, q);
		timeMatvec += getWTime() - timeMatvec_s;
		DBGVEC("q = A * p= ", q, n);

		/* dot_pq    = <p(k),q(k)> */
		vectorDot(p, q, n, &dot_pq);
		DBGSCA("dot_pq = <p, q> = ", dot_pq);

		/* alpha     = rho(k) / dot_pq */
		alpha = rho / dot_pq;
		DBGSCA("alpha = rho / dot_pq = ", alpha);

		/* x(k+1)    = x(k) + alpha*p(k) */
		axpy(alpha, p, n, x);
#pragma acc update host(x[0:n])
		DBGVEC("x = x + alpha * p= ", x, n);

		/* r(k+1)    = r(k) - alpha*q(k) */
		axpy(-alpha, q, n, r);
		DBGVEC("r = r - alpha * q= ", r, n);


		rho_old = rho;
		DBGSCA("rho_old = rho = ", rho_old);


		/* rho(k+1)  = <r(k+1), r(k+1)> */
		vectorDot(r, r, n, &rho);
		DBGSCA("rho = <r, r> = ", rho);

		/* Normalize the residual with initial one */
		sc->residual= sqrt(rho) * bnrm2;


   	
		/* Check convergence ||r(k+1)||_2 < eps
		 * If the residual is smaller than the CG
		 * tolerance specified in the CG_TOLERANCE
		 * environment variable our solution vector
		 * is good enough and we can stop the 
		 * algorithm. */
		printf("res_%d=%e\n", iter+1, sc->residual);
		if(sc->residual <= sc->tolerance)
			break;


		/* beta      = rho(k+1) / rho(k) */
		beta = rho / rho_old;
		DBGSCA("beta = rho / rho_old= ", beta);

		/* p(k+1)    = r(k+1) + beta*p(k) */
		xpay(r, beta, n, p);
		DBGVEC("p = r + beta * p> = ", p, n);

	}

	/* Store the number of iterations and the 
	 * time for the sparse matrix vector
	 * product which is the most expensive 
	 * function in the whole CG algorithm. */
	sc->iter = iter;
	sc->timeMatvec = timeMatvec;

	/* Clean up */
	free(r);
	free(p);
	free(q);
}//ende data region
}
void Phonon::setup(bool printDefaults)
{
	//Parse input to initialize unit cell:
	parse(input, e, printDefaults);
	logSuspend();
	parse(input, eSupTemplate); //silently create a copy by re-parsing input (Everything is not trivially copyable)
	logResume();
	
	//Ensure phonon command specified:
	if(!sup.length())
		die("phonon supercell must be specified using the phonon command.\n");
	if(!e.gInfo.S.length_squared())
		die("Manual fftbox setting required for phonon. If supercell grid\n"
			"initialization fails, specify slightly larger manual fftbox.\n");
	//Check kpoint and supercell compatibility:
	if(e.eInfo.qnums.size()>1 || e.eInfo.qnums[0].k.length_squared())
		die("phonon requires a Gamma-centered uniform kpoint mesh.\n");
	for(int j=0; j<3; j++)
	{	if(!sup[j] || e.eInfo.kfold[j] % sup[j])
		{	die("kpoint folding %d is not a multiple of supercell count %d for lattice direction %d.\n",
				e.eInfo.kfold[j], sup[j], j);
		}
		eSupTemplate.eInfo.kfold[j] = e.eInfo.kfold[j] / sup[j];
	}
	
	logPrintf("########### Unit cell calculation #############\n");
	SpeciesInfo::Constraint constraintFull;
	constraintFull.moveScale = 0;
	constraintFull.type = SpeciesInfo::Constraint::None;
	for(size_t sp=0; sp<e.iInfo.species.size(); sp++)
		e.iInfo.species[sp]->constraints.assign(e.iInfo.species[sp]->atpos.size(), constraintFull);
	e.setup();
	if(!e.coulombParams.supercell) e.updateSupercell(true); //force supercell generation

	nSpins = e.eInfo.spinType==SpinZ ? 2 : 1;
	nSpinor = e.eInfo.spinorLength();

	//Initialize state of unit cell:
	if(e.cntrl.dumpOnly)
	{	//Single energy calculation so that all dependent quantities have been initialized:
		logPrintf("\n----------- Energy evaluation at fixed state -------------\n"); logFlush();
		e.eVars.elecEnergyAndGrad(e.ener, 0, 0, true);
	}
	else elecFluidMinimize(e);
	logPrintf("# Energy components:\n"); e.ener.print(); logPrintf("\n");

	//Determine optimum number of bands for supercell calculation:
	nBandsOpt = 0;
	for(int q=e.eInfo.qStart; q<e.eInfo.qStop; q++)
	{	int nBands_q = std::upper_bound(e.eVars.F[q].begin(), e.eVars.F[q].end(), Fcut, std::greater<double>()) - e.eVars.F[q].begin();
		nBandsOpt = std::max(nBandsOpt, nBands_q);
	}
	mpiUtil->allReduce(nBandsOpt, MPIUtil::ReduceMax);
	logPrintf("Fcut=%lg reduced nBands from %d to %d per unit cell.\n", Fcut, e.eInfo.nBands, nBandsOpt);

	//Make unit cell state available on all processes 
	//(since MPI division of qSup and q are different and independent of the map)
	for(int q=0; q<e.eInfo.nStates; q++)
	{	//Allocate:
		if(!e.eInfo.isMine(q))
		{	e.eVars.C[q].init(e.eInfo.nBands, e.basis[q].nbasis * e.eInfo.spinorLength(), &e.basis[q], &e.eInfo.qnums[q]);
			e.eVars.F[q].resize(e.eInfo.nBands);
			e.eVars.Hsub_eigs[q].resize(e.eInfo.nBands);
			if(e.eInfo.fillingsUpdate==ElecInfo::FermiFillingsAux)
				e.eVars.B[q].init(e.eInfo.nBands, e.eInfo.nBands);
		}
		//Broadcast from owner:
		int qSrc = e.eInfo.whose(q);
		e.eVars.C[q].bcast(qSrc);
		e.eVars.F[q].bcast(qSrc);
		e.eVars.Hsub_eigs[q].bcast(qSrc);
		if(e.eInfo.fillingsUpdate==ElecInfo::FermiFillingsAux)
			e.eVars.B[q].bcast(qSrc);
	}

	logPrintf("\n------- Configuring supercell and perturbation modes -------\n");
	
	//Grid:
	eSupTemplate.gInfo.S = Diag(sup) * e.gInfo.S; //ensure exact supercell
	eSupTemplate.gInfo.R = e.gInfo.R * Diag(sup);
	prodSup = sup[0] * sup[1] * sup[2];
	
	//Replicate atoms (and related properties):
	for(size_t sp=0; sp<e.iInfo.species.size(); sp++)
	{	const SpeciesInfo& spIn = *(e.iInfo.species[sp]);
		SpeciesInfo& spOut = *(eSupTemplate.iInfo.species[sp]);
		spOut.atpos.clear();
		spOut.initialMagneticMoments.clear();
		matrix3<> invSup = inv(Diag(vector3<>(sup)));
		vector3<int> iR;
		for(iR[0]=0; iR[0]<sup[0]; iR[0]++)
		for(iR[1]=0; iR[1]<sup[1]; iR[1]++)
		for(iR[2]=0; iR[2]<sup[2]; iR[2]++)
		{	for(vector3<> pos: spIn.atpos)
				spOut.atpos.push_back(invSup * (pos + iR));
			for(vector3<> M: spIn.initialMagneticMoments)
				spOut.initialMagneticMoments.push_back(M); //needed only to determine supercell symmetries
		}
		spOut.constraints.assign(spOut.atpos.size(), constraintFull);
	}
	
	//Supercell symmetries:
	eSupTemplate.symm.setup(eSupTemplate);
	const std::vector< matrix3<int> >& symSup = eSupTemplate.symm.getMatrices();
	symSupCart.clear();
	eSupTemplate.gInfo.invR = inv(eSupTemplate.gInfo.R);
	for(const matrix3<int>& m: symSup)
		symSupCart.push_back(eSupTemplate.gInfo.R * m * eSupTemplate.gInfo.invR);
	
	//Pick maximally symmetric orthogonal basis:
	logPrintf("\nFinding maximally-symmetric orthogonal basis for displacements:\n");
	std::vector< vector3<> > dirBasis;
	{	std::multimap<int, vector3<> > dirList; //directions indexed by their stabilizer group cardinality
		vector3<int> iR;
		for(iR[0]=0; iR[0]<=+1; iR[0]++)
		for(iR[1]=-1; iR[1]<=+1; iR[1]++)
		for(iR[2]=-1; iR[2]<=+1; iR[2]++)
			if(iR.length_squared())
			{	//Try low-order lattice vector linear combination:
				vector3<> n = eSupTemplate.gInfo.R * iR; n *= (1./n.length());
				dirList.insert(std::make_pair(nStabilizer(n, symSupCart), n));
				//Try low-order reciprocal lattice vector linear combination:
				n = iR * eSupTemplate.gInfo.invR; n *= (1./n.length());
				dirList.insert(std::make_pair(nStabilizer(n, symSupCart), n));
			}
		dirBasis.push_back(dirList.rbegin()->second);
		//Pick second driection orthogonal to first:
		std::multimap<int, vector3<> > dirList2;
		for(auto entry: dirList)
		{	vector3<> n = entry.second;
			n -= dot(n, dirBasis[0]) * dirBasis[0];
			if(n.length_squared() < symmThresholdSq) continue;
			n *= (1./n.length());
			dirList2.insert(std::make_pair(nStabilizer(n, symSupCart), n));
		}
		dirBasis.push_back(dirList2.rbegin()->second);
		dirBasis.push_back(cross(dirBasis[0], dirBasis[1])); //third direction constrained by orthogonality
	}
	for(const vector3<>& n: dirBasis)
		logPrintf(" [ %+lf %+lf %+lf ] |Stabilizer|: %d\n", n[0], n[1], n[2], nStabilizer(n,symSupCart));
	
	//List all modes:
	modes.clear();
	for(size_t sp=0; sp<e.iInfo.species.size(); sp++)
		for(size_t at=0; at<e.iInfo.species[sp]->atpos.size(); at++) //only need to move atoms in first unit cell
			for(int iDir=0; iDir<3; iDir++)
			{	Mode mode;
				mode.sp = sp;
				mode.at = at;
				mode.dir[iDir] = 1.;
				modes.push_back(mode);
			}

	//Find irreducible modes:
	perturbations.clear();
	for(unsigned sp=0; sp<e.iInfo.species.size(); sp++)
	{	int nAtoms = e.iInfo.species[sp]->atpos.size();
		int nPert = nAtoms * dirBasis.size();
		//generate all perturbations first:
		std::vector<Perturbation> pertSp(nPert); //perturbations of this species
		std::vector<matrix> proj(nPert); //projection operator into subspace spanned by star of current perturbation
		matrix projTot;
		const auto& atomMap = eSupTemplate.symm.getAtomMap()[sp];
		for(int iPert=0; iPert<nPert; iPert++)
		{	pertSp[iPert].sp = sp;
			pertSp[iPert].at = iPert / dirBasis.size();
			pertSp[iPert].dir = dirBasis[iPert % dirBasis.size()];
			pertSp[iPert].weight = 1./symSupCart.size();
			for(unsigned iSym=0; iSym<symSupCart.size(); iSym++)
			{	int at = atomMap[pertSp[iPert].at][iSym] % nAtoms; //map back to first cell
				vector3<> dir = symSupCart[iSym] * pertSp[iPert].dir;
				matrix nHat = zeroes(nPert,1);
				for(int iDir=0; iDir<3; iDir++)
					nHat.set(at*3+iDir,0, dir[iDir]);
				proj[iPert] += pertSp[iPert].weight * nHat * dagger(nHat);
			}
			projTot += proj[iPert];
		}
		myassert(nrm2(projTot - eye(nPert)) < symmThreshold);
		//only select perturbations with distinct subspace projections:
		std::vector<bool> irred(nPert, true); //whether each perturbation is in irreducible set
		for(int iPert=0; iPert<nPert; iPert++)
		{	for(int jPert=0; jPert<iPert; jPert++)
				if(irred[jPert] && nrm2(proj[iPert]-proj[jPert])<symmThreshold)
				{	pertSp[jPert].weight += pertSp[iPert].weight; //send weight of current mode to its image in irreducible set
					irred[iPert] = false; //this mode will be accounted for upon symmetrization
					break;
				}
		}
		for(int iPert=0; iPert<nPert; iPert++)
			if(irred[iPert])
				perturbations.push_back(pertSp[iPert]);
	}
	logPrintf("\n%d perturbations of the unit cell reduced to %d under symmetries:\n", int(modes.size()), int(perturbations.size()));
	for(const Perturbation& pert: perturbations)
		logPrintf("%s %d  [ %+lf %+lf %+lf ] %lf\n", e.iInfo.species[pert.sp]->name.c_str(),
			pert.at, pert.dir[0], pert.dir[1], pert.dir[2], pert.weight*symSupCart.size());
	
	//Determine wavefunction unitary rotations:
	logPrintf("\nCalculating unitary rotations of unit cell states under symmetries:\n");
	stateRot.resize(nSpins);
	double unitarityErr = 0.;
	for(int iSpin=0; iSpin<nSpins; iSpin++)
	{	//Find states involved in the supercell Gamma-point:
		struct Kpoint : public Supercell::KmeshTransform
		{	vector3<> k; //also store k-point for convenience (KmeshTransform doesn't have it)
		};
		std::vector<Kpoint> kpoints; kpoints.reserve(prodSup);
		const Supercell& supercell = *(e.coulombParams.supercell);
		for(unsigned ik=0; ik<supercell.kmesh.size(); ik++)
		{	double kSupErr; round(matrix3<>(Diag(sup)) * supercell.kmesh[ik], &kSupErr);
			if(kSupErr < symmThreshold) //maps to Gamma point
			{	Kpoint kpoint;
				(Supercell::KmeshTransform&)kpoint = supercell.kmeshTransform[ik]; //copy base class
				kpoint.k = supercell.kmesh[ik];
				kpoint.iReduced += iSpin*(e.eInfo.nStates/nSpins); //point to source k-point with appropriate spin
				kpoints.push_back(kpoint);
			}
		}
		myassert(int(kpoints.size()) == prodSup);
		//Initialize basis and qnum for these states:
		std::vector<QuantumNumber> qnums(prodSup);
		std::vector<Basis> basis(prodSup);
		logSuspend();
		for(int ik=0; ik<prodSup; ik++)
		{	qnums[ik].k = kpoints[ik].k;
			qnums[ik].spin = (nSpins==1 ? 0 : (iSpin ? +1 : -1));
			qnums[ik].weight = 1./prodSup;
			basis[ik].setup(e.gInfo, e.iInfo, e.cntrl.Ecut, kpoints[ik].k);
		}
		logResume();
		//Get wavefunctions for all these k-points:
		#define whose_ik(ik) (((ik) * mpiUtil->nProcesses())/prodSup) //local MPI division
		std::vector<ColumnBundle> C(prodSup);
		std::vector<std::shared_ptr<ColumnBundleTransform::BasisWrapper> > basisWrapper(prodSup);
		auto sym = e.symm.getMatrices(); //unit cell symmetries
		for(int ik=0; ik<prodSup; ik++)
		{	C[ik].init(e.eInfo.nBands, basis[ik].nbasis*nSpinor, &basis[ik], &qnums[ik], isGpuEnabled());
			if(whose_ik(ik) == mpiUtil->iProcess())
			{	int q = kpoints[ik].iReduced;
				C[ik].zero();
				basisWrapper[ik] = std::make_shared<ColumnBundleTransform::BasisWrapper>(basis[ik]);
				ColumnBundleTransform(e.eInfo.qnums[q].k, e.basis[q], qnums[ik].k, *(basisWrapper[ik]),
					nSpinor, sym[kpoints[ik].iSym], kpoints[ik].invert).scatterAxpy(1., e.eVars.C[q], C[ik],0,1);
			}
		}
		for(int ik=0; ik<prodSup; ik++) C[ik].bcast(whose_ik(ik)); //make available on all processes
		//Determine max eigenvalue:
		int nBands = e.eInfo.nBands;
		double Emax = -INFINITY;
		for(int q=e.eInfo.qStart; q<e.eInfo.qStop; q++)
			Emax = std::max(Emax, e.eVars.Hsub_eigs[q].back());
		mpiUtil->allReduce(Emax, MPIUtil::MPIUtil::ReduceMax);
		double EmaxValid = +INFINITY;
		//Loop over supercell symmetry operations:
		PeriodicLookup<QuantumNumber> plook(qnums, e.gInfo.GGT);
		stateRot[iSpin].resize(symSupCart.size());
		for(size_t iSym=0; iSym<symSupCart.size(); iSym++)
		{	matrix3<> symUnitTmp = e.gInfo.invR * symSupCart[iSym] * e.gInfo.R; //in unit cell lattice coordinates
			#define SymmErrMsg \
				"Supercell symmetries do not map unit cell k-point mesh onto itself.\n" \
				"This implies that the supercell is more symmetric than the unit cell!\n" \
				"Please check to make sure that you have used the minimal unit cell.\n\n"
			matrix3<int> symUnit;
			for(int j1=0; j1<3; j1++)
				for(int j2=0; j2<3; j2++)
				{	symUnit(j1,j2) = round(symUnitTmp(j1,j2));
					if(fabs(symUnit(j1,j2) - symUnitTmp(j1,j2)) > symmThreshold)
						die(SymmErrMsg)
				}
			//Find image kpoints under rotation: (do this for all k-points so that all processes exit together if necessary)
			std::vector<int> ikRot(prodSup);
			for(int ik=0; ik<prodSup; ik++)
			{	size_t ikRotCur = plook.find(qnums[ik].k * symUnit);
				if(ikRotCur==string::npos) die(SymmErrMsg)
				ikRot[ik] = ikRotCur;
			}
			#undef SymmErrMsg
			//Calculate unitary transformation matrix:
			stateRot[iSpin][iSym].init(prodSup, nBands);
			for(int ik=0; ik<prodSup; ik++)
				if(whose_ik(ikRot[ik]) == mpiUtil->iProcess()) //MPI division by target k-point
				{	ColumnBundle Crot = C[ikRot[ik]].similar();
					Crot.zero();
					ColumnBundleTransform(qnums[ik].k, basis[ik], qnums[ikRot[ik]].k, *(basisWrapper[ikRot[ik]]),
						nSpinor, symUnit, +1).scatterAxpy(1., C[ik], Crot,0,1);
					matrix Urot = Crot ^ O(C[ikRot[ik]]); //will be unitary if Crot is a strict unitary rotation of C[ikRot[ik]]
					//Check maximal subspace that is unitary: (remiander must be incomplete degenerate subspace)
					int nBandsValid = nBands;
					while(nBandsValid && !isUnitary(Urot(0,nBandsValid, 0,nBandsValid)))
						nBandsValid--;
					if(nBandsValid<nBands)
					{	//Update energy range of validity:
						EmaxValid = std::min(EmaxValid, e.eVars.Hsub_eigs[kpoints[ik].iReduced][nBandsValid]);
						//Make valid subspace exactly unitary:
						matrix UrotSub = Urot(0,nBandsValid, 0,nBandsValid);
						matrix UrotOverlap = dagger(UrotSub) * UrotSub;
						UrotSub = UrotSub * invsqrt(UrotOverlap); //make exactly unitary
						unitarityErr += std::pow(nrm2(UrotOverlap - eye(nBandsValid)), 2);
						//Zero out invalid subspace:
						Urot.zero();
						Urot.set(0,nBandsValid, 0,nBandsValid, UrotSub);
					}
					stateRot[iSpin][iSym].set(ik, ikRot[ik], Urot);
				}
			stateRot[iSpin][iSym].allReduce();
		}
		#undef whose_ik
		mpiUtil->allReduce(EmaxValid, MPIUtil::ReduceMin);
		if(nSpins>1) logPrintf("\tSpin %+d: ", iSpin ? +1 : -1);  else logPrintf("\t");
		logPrintf("Matrix elements valid for ");
		if(std::isfinite(EmaxValid)) logPrintf("E < %+.6lf (Emax = %+.6lf) due to incomplete degenerate subspaces.\n", EmaxValid, Emax);
		else logPrintf("all available states (all degenerate subspaces are complete).\n");
	}
	mpiUtil->allReduce(unitarityErr, MPIUtil::ReduceSum);
	unitarityErr = sqrt(unitarityErr / (nSpins * prodSup * symSupCart.size()));
	logPrintf("\tRMS unitarity error in valid subspaces: %le\n", unitarityErr);
}
inline bool isUnitary(const matrix& U) { return nrm2(U*dagger(U) - eye(U.nCols())) < symmThreshold; }
Example #9
0
/*! solve */
int32_t gmres
(
 const CPPL::dgsmatrix& A,
 const CPPL::dcovector& b,
 CPPL::dcovector& x,
 const double& eps
 )
{
  ///////////////////////////////////////////////
  //////////////// preconditioner ///////////////
  ///////////////////////////////////////////////
  CPPL::dgbmatrix Minv(x.l, x.l, 0, 0);
  
  //////// no precondition ////////
  Minv.identity();
  
  ///////////////////////////////////////////////
  ///////////////// mid values //////////////////
  ///////////////////////////////////////////////
  long m(10);//restart number
  CPPL::dcovector r(b-A*x);
  CPPL::dcovector s(m+1), co(m+1), si(m+1), w;
  std::vector<CPPL::dcovector> v(m+1);
  CPPL::dgematrix H(m+1,m);
  //H.zero();
  //co.zero();
  //si.zero();
  //s.zero();
  
  //////// norm ////////
  double norm_r, norm_r_min(DBL_MAX);
  const double norm_r_ini(fabs(damax(r)));
  std::cerr << "[NOTE]@gmres: norm_r_ini=" << norm_r_ini << ", eps=" << eps<< std::endl;
  if( norm_r_ini<DBL_MIN ){
    std::cerr << "[NOTE]@gmres: already converged. v(^^)" << std::endl;
    return 0;
  }
  
  ///////////////////////////////////////////////
  //////////////////// loop /////////////////////
  ///////////////////////////////////////////////
  int itc(1);
  //int itmax(int(2.1*x.l));
  int itmax(int(1.1*x.l));
  //int itmax(int(0.6*x.l));
  do{
    std::cerr << "** itc=" << itc << " ********************************************" << std::endl;
    //////// 0 ////////
    v[0] =r/nrm2(r);
    s.zero();
    s(0) =nrm2(r);
    
    for(long i=0; i<m; i++){
      //std::cerr << "++++ i=" << i << " ++++" << std::endl;
      w =A*v[i];
      w =Minv*w;
      for(long k=0; k<i+1; k++){
        H(k,i) =w%v[k];
        w -=H(k,i)*v[k];
      }
      H(i+1,i) =nrm2(w);
      v[i+1] =w/H(i+1,i);
      
      //// J,s ////
      for(long k=0; k<i; k++){
        rotate(H(k,i), H(k+1,i), co(k), si(k));
      }
      make_rotator( H(i,i), H(i+1,i), co(i), si(i) );
      //std::cerr << "co = " << t(co) << std::endl; std::cerr << "si = " << t(si) << std::endl;
      rotate( H(i,i), H(i+1,i), co(i), si(i) );//necessary
      //std::cerr << "H =\n" << H << std::endl;
      rotate( s(i), s(i+1), co(i), si(i) );
      //std::cerr << "s = " << t(s) << std::endl;
    }
    //for(long i=0; i<m+1; i++){ for(long j=i+1; j<m+1; j++){ std::cerr << "vv = " << v[i]%v[j] << std::endl; } }// v check
    //std::cerr << "H =\n" << H << std::endl;
    //std::cerr << "s =" << t(s) << std::endl;
    //for(long i=0; i<m+1; i++){ std::cerr << "v["<<i<<"] =" << t(v[i]) << std::flush; }
    
    //////// y ////////
    CPPL::dcovector y(s);
    for(long i=m-1; i>=0; i--){
      y(i) /= H(i,i);
      for(long j=i-1; j>=0; j--){
        y(j) -= H(j,i) * y(i);
      }
    }
    //std::cerr << "H*y = " << t(H*y) << std::endl;    
    //std::cerr << "s   = " << t(s) << std::endl;
    //std::cerr << "y = " << t(s) << std::endl;
    
    //////// update ////////
    for(long i=0; i<m; i++){
      x += v[i] * y(i);
    }
    //std::cerr << "x = " << t(x) << std::endl;
    
    //////// residual ////////
    r =b-A*x;
    r =Minv*r;
    //std::cerr << "r = " << t(r) << std::endl;
    
    //////// convergence check ////////
    norm_r =fabs(damax(r));
    std::cerr << "norm_r = " << norm_r << std::endl;
    if( isnan(norm_r) ){ break; }//failed
    if( !std::isnormal(norm_r) ){ break; }//failed
    if( !std::isfinite(norm_r) ){ break; }//failed
    if( norm_r>1e3*norm_r_ini ){ break; }//failed (getting so worse)
    if( norm_r<=eps ){//r satistied
      std::cerr << "[NOTE]@gmres: converged. v(^^)  itc=" << itc << "/" << itmax << ", norm=" << norm_r << std::endl;
      return 0;
    }
  }while(++itc<itmax);
  
  //////// failed ////////
  std::cerr << "[NOTE]@gmres: itc=" << itc << ", norm=" << norm_r << ", r_satisfied=" << (norm_r<=eps) << std::endl;
  std::cerr << "[NOTE]@gmres: failed to converge. orz" << std::endl;
  return 1;
}
Example #10
0
void
KPMLinalg::normalize( my::scalar* x )
{
	const my::real norm=1./nrm2(x);
	scale(norm,x);
};
Example #11
0
      Quaternion inverse() const {
	const double inv_nrm2 = 1.0 / nrm2();
	return Quaternion(_real * inv_nrm2, - _imaginary * inv_nrm2);
      }
Example #12
0
      double nrm() const {
	return std::sqrt(nrm2());
      }
Example #13
0
      structmass1 += mass;
      COMVel1 += Sim.particleList[ID].getVelocity() * mass;
      COMPos1 += Sim.particleList[ID].getPosition() * mass;
    }
    
  BOOST_FOREACH(const size_t& ID, range2)
    {
      double mass = Sim.dynamics.getSpecies(Sim.particleList[ID]).getMass(ID);
      structmass2 += mass;
      COMVel2 += Sim.particleList[ID].getVelocity() * mass;	
      COMPos2 += Sim.particleList[ID].getPosition() * mass;
    }
    
  COMVel1 /= structmass1;
  COMVel2 /= structmass2;

  COMPos1 /= structmass1;
  COMPos2 /= structmass2;

  rij = COMPos1 - COMPos2;

  vij = COMVel1 - COMVel2;

  Sim.dynamics.BCs().applyBC(rij, vij);

  rvdot = (rij | vij);

  r2 = rij.nrm2();
  v2 = vij.nrm2();
}
Example #14
0
/* Computes the norm of (I - A). */
double minusIdNrm2(Mat mA) {
  addId(-1, mA); // sort of a hack, but it works very well
  double norm = nrm2(mA);
  addId(1, mA);
  return norm;
}
Example #15
0
int main(int argc, char** argv) {
    MPI_Init(&argc, &argv);
    /*# Init #*/
    int rankWorld, sizeWorld;
    MPI_Comm_size(MPI_COMM_WORLD, &sizeWorld);
    MPI_Comm_rank(MPI_COMM_WORLD, &rankWorld);
    const HpddmOption* const opt = HpddmOptionGet();
    HpddmOptionParse(opt, argc, argv, rankWorld == 0);
    {
        char* val[4] = { "Nx=<100>", "Ny=<100>", "overlap=<1>", "generate_random_rhs=<0>" };
        char* desc[4] = { "Number of grid points in the x-direction.", "Number of grid points in the y-direction.", "Number of grid points in the overlap.", "Number of generated random right-hand sides." };
        HpddmOptionParseInts(opt, argc, argv, 4, val, desc);
        val[0] = "symmetric_csr=(0|1)"; desc[0] = "Assemble symmetric matrices.";
        val[1] = "nonuniform=(0|1)"; desc[1] = "Use a different number of eigenpairs to compute on each subdomain.";
        HpddmOptionParseArgs(opt, argc, argv, 2, val, desc);
    }
    int sizes[8];
    int* connectivity[8];
    int o[8];
    int neighbors = 0;
    HpddmMatrixCSR* Mat, *MatNeumann = NULL;
    K* f, *sol;
    underlying_type* d;
    int ndof;
    generate(rankWorld, sizeWorld, &neighbors, o, sizes, connectivity, &ndof, &Mat, &MatNeumann, &d, &f, &sol);
    unsigned short mu = HpddmOptionApp(opt, "generate_random_rhs");
    int status = 0;
    if(sizeWorld > 1) {
        HpddmSchwarz* A = HpddmSchwarzCreate(Mat, neighbors, o, sizes, connectivity);
        for(int i = 0; i < neighbors; ++i)
            free(connectivity[i]);
        HpddmSchwarzMultiplicityScaling(A, d);
        HpddmSchwarzInitialize(A, d);
        if(mu != 0)
            HpddmSchwarzScaledExchange(A, f, mu);
        else
            mu = 1;
        if(HpddmOptionSet(opt, "schwarz_coarse_correction")) {
            double* addr = HpddmOptionAddr(opt, "geneo_nu");
            unsigned short nu = *addr;
            if(nu > 0) {
                if(HpddmOptionApp(opt, "nonuniform"))
                    *addr += MAX((int)(-*addr + 1), pow(-1, rankWorld) * rankWorld);
                HpddmSchwarzSolveGEVP(A, MatNeumann);
                nu = HpddmOptionVal(opt, "geneo_nu");
            }
            else {
                nu = 1;
                K** deflation = malloc(sizeof(K*));
                *deflation = malloc(sizeof(K) * ndof);
                for(int i = 0; i < ndof; ++i)
                    deflation[0][i] = 1.0;
                HpddmSetVectors(HpddmSchwarzPreconditioner(A), deflation);
            }
            HpddmInitializeCoarseOperator(HpddmSchwarzPreconditioner(A), nu);
            HpddmSchwarzBuildCoarseOperator(A, MPI_COMM_WORLD);
            /*# FactorizationEnd #*/
        }
        HpddmSchwarzCallNumfact(A);
        if(rankWorld != 0)
            HpddmOptionRemove(opt, "verbosity");
        const MPI_Comm* comm = HpddmGetCommunicator(HpddmSchwarzPreconditioner(A));
        /*# Solution #*/
        int it = HpddmSolve(A, f, sol, mu, comm);
        /*# SolutionEnd #*/
        underlying_type* storage = malloc(sizeof(underlying_type) * 2 * mu);
        HpddmSchwarzComputeResidual(A, sol, f, storage, mu);
        if(rankWorld == 0)
            for(unsigned short nu = 0; nu < mu; ++nu) {
                if(nu == 0)
                    printf(" --- residual = ");
                else
                    printf("                ");
                printf("%e / %e", storage[1 + 2 * nu], storage[2 * nu]);
                if(mu > 1)
                    printf(" (rhs #%d)", nu + 1);
                printf("\n");
            }
        if(it > ((int)HpddmOptionVal(opt, "krylov_method") == 6 ? 60 : 45))
            status = 1;
        else {
            for(unsigned short nu = 0; nu < mu; ++nu)
                 if(storage[1 + 2 * nu] / storage[2 * nu] > 1.0e-2)
                     status = 1;
        }
        free(storage);
        if(HpddmOptionVal(opt, "geneo_nu") == 0)
            HpddmDestroyVectors(HpddmSchwarzPreconditioner(A));
        HpddmSchwarzDestroy(A);
    }
    else {
        HpddmSubdomain* S = NULL;
        HpddmSubdomainNumfact(&S, Mat);
        mu = MAX(1, mu);
        HpddmSubdomainSolve(S, f, sol, mu);
        int one = 1;
        underlying_type* nrmb = malloc(sizeof(underlying_type) * 2 * mu);
        for(unsigned short nu = 0; nu < mu; ++nu)
            nrmb[nu] = nrm2(&ndof, f + nu * ndof, &one);
        K* tmp = malloc(sizeof(K) * mu * ndof);
        HpddmCSRMM(Mat, sol, tmp, mu);
        K minus = -1;
        ndof *= mu;
        axpy(&ndof, &minus, f, &one, tmp, &one);
        ndof /= mu;
        underlying_type* nrmAx = nrmb + mu;
        for(unsigned short nu = 0; nu < mu; ++nu) {
            nrmAx[nu] = nrm2(&ndof, tmp + nu * ndof, &one);
            if(nu == 0)
                printf(" --- residual = ");
            else
                printf("                ");
            printf("%e / %e", nrmAx[nu], nrmb[nu]);
            if(mu > 1)
                printf(" (rhs #%d)", nu + 1);
            printf("\n");
            if(nrmAx[nu] / nrmb[nu] > (sizeof(underlying_type) == sizeof(double) ? 1.0e-6 : 1.0e-2))
                status = 1;
        }
        free(tmp);
        free(nrmb);
        HpddmSubdomainDestroy(S);
        HpddmMatrixCSRDestroy(Mat);
    }
    free(d);

    if(HpddmOptionSet(opt, "schwarz_coarse_correction") && HpddmOptionVal(opt, "geneo_nu") > 0)
        HpddmMatrixCSRDestroy(MatNeumann);
    free(sol);
    free(f);
    MPI_Finalize();
    return status;
}
LatticeMinimizer::LatticeMinimizer(Everything& e) : e(e), Rorig(e.gInfo.R)
{
	logPrintf("\n--------- Lattice Minimization ---------\n");
	
	//Ensure that lattice-move-scale is commensurate with symmetries:
	std::vector<matrix3<int>> sym = e.symm.getMatrices();
	for(const matrix3<int>& m: sym)
		for(int i=0; i<3; i++)
			for(int j=0; j<3; j++)
				if(m(i,j) && e.cntrl.lattMoveScale[i] != e.cntrl.lattMoveScale[j])
					die("latt-move-scale is not commensurate with symmetries:\n"
						"\t(Lattice vectors #%d and #%d are connected by symmetry,\n"
						"\tbut have different move scale factors %lg != %lg).\n",
						i, j, e.cntrl.lattMoveScale[i], e.cntrl.lattMoveScale[j]);
	
	//Check which lattice vectors can be altered:
	vector3<bool> isFixed, isTruncated = e.coulombParams.isTruncated();
	for(int k=0; k<3; k++)
		isFixed[k] = (e.cntrl.lattMoveScale[k]==0.) || isTruncated[k];
	
	//Create a orthonormal basis for strain commensurate with symmetries:
	for(int k=0; k<6; k++)
	{	//Initialize a basis element for arbitrary symmetric matrices:
		matrix3<int> s; //all zero:
		if(k<3) //diagonal strain
		{	s(k,k) = 1;
			if(isFixed[k]) continue; //strain alters fixed direction
		}
		else //off-diagonal strain
		{	int i=(k+1)%3;
			int j=(k+2)%3;
			s(i,j) = s(j,i) = 1;
			if(isFixed[i] || isFixed[j]) continue;  //strain alters fixed direction
		}
		//Symmetrize:
		matrix3<int> sSym;
		for(const matrix3<int>& m: sym)
		{	matrix3<int> mInv = det(m) * adjugate(m); //since |det(m)| = 1
			sSym += mInv * s * m;
		}
		//Orthonormalize w.r.t previous basis elements:
		matrix3<> strain(sSym); //convert from integer to double matrix
		for(const matrix3<>& sPrev: strainBasis)
			strain -= sPrev * dot(sPrev, strain);
		double strainNorm = nrm2(strain);
		if(strainNorm < symmThresholdSq) continue; //linearly dependent
		strainBasis.push_back((1./strainNorm) * strain);
	}
	if(!strainBasis.size())
		die("All lattice-vectors are constrained by coulomb truncation and/or\n"
			"latt-move-scale: please disable lattice minimization.\n");
	
	//Print initialization status:
	e.latticeMinParams.nDim = strainBasis.size();
	logPrintf("Minimization of dimension %lu over strains spanned by:\n", strainBasis.size());
	for(const matrix3<>& s: strainBasis)
	{	s.print(globalLog, " %lg ");
		logPrintf("\n");
	}

	h = 1e-5;
}
Example #17
0
Results* join_clusters2_restart
(double *x,//array/matrix of data
 SymNoDiag *W,//lower triangle of weight matrix
 unsigned int Px,//problem size
 double lambda,//starting point in regularization path
 double join_thresh, //tolerance for equality of points
 double opt_thresh, //tolerance for optimality
 double lambda_factor,//increase of lambda after optimality
 double smooth,//smoothing parameter
 int maxit,
 int linesearch_freq,//how often to do a linesearch? if 0, never. if
		     //n>0, do n-1 linesearch steps for every
		     //decreasing step size step. set this to 2 if
		     //unsure.
 int linesearch_points,//how many points to check along the gradient
		       //direction. set to 10 if unsure.
 int check_splits,
 int target_cluster,
 int verbose
 ){
  unsigned int N = W->N;
  //W->print();
  double old_lambda=0;
  std::vector<int> rows,rowsj;
  std::vector<int>::iterator rowit,ri,rj;
  std::list< std::vector<int> > clusters,tocheck;
  std::list< std::vector<int> >::iterator it,cj;
  unsigned int i,k,j;
  int tried_restart;
  for(i=0;i<N;i++){
    rows.assign(1,i);
    clusters.push_back(rows);
  }
  double *old_alpha = new double[N*Px];
  double *alpha = new double[N*Px];
  double *xbar = new double[N*Px];
  double *dir = new double[N*Px];
  for(i=0;i<N*Px;i++){
    alpha[i]=xbar[i]=x[i];
  }
  Matrix amat(alpha,N,Px),xmat(x,N,Px);
  SymNoDiag diffs(N);
  diffs.calc_diffs(clusters,amat,nrm2);
  //store initial trivial solution
  Results *results = new Results(N,Px,opt_thresh);
  if(target_cluster==0)results->add(alpha,0,0);
  double weight,diff,step;
  while(clusters.size()>1){
    double grad=opt_thresh;
    int iteration=1;
    tried_restart=0;
    //if we use the general (slower) algorithm for any weights, then
    //split the clusters to individual points
    if(check_splits){
      clusters.clear();
      //reassign original clusters
      for(i=0;i<N;i++){
	rows.assign(1,i);
	clusters.push_back(rows);
      }
      //recopy original xbar
      for(i=0;i<N*Px;i++){
	xbar[i]=x[i];
      }
    }
    while(grad>=opt_thresh){
      //first calc gradients
      grad = 0;
      for(it=clusters.begin();it!=clusters.end();it++){
	rows = *it;
	i = rows[0];
	for(k=0;k<Px;k++){
	  dir[i+k*N] = xbar[i+k*N] - alpha[i+k*N];
	}
	for(cj=clusters.begin();cj!=clusters.end();cj++){
	  if(it!=cj){
	    rowsj = *cj;
	    j=rowsj[0];
	    weight=0;
	    diff = *diffs(i,j);
	    if(diff!=0){
	      if(smooth!=0){
		diff *= diff; //now squared l2 norm
		diff += smooth; //add smoothing parameter under sqrt
		diff = sqrt(diff);//put sqrt back
	      }
	      for(ri=rows.begin();ri!=rows.end();ri++){
		for(rj=rowsj.begin();rj!=rowsj.end();rj++){
		  weight += W->getval(*ri,*rj);
		}
	      }
	      //weight *= lambda / diff / ((double)(N-1)) / ((double)rows.size());
	      weight *= lambda / diff / ((double)rows.size());
	      for(k=0;k<Px;k++){
		dir[i+k*N] += weight * (alpha[j+k*N]-alpha[i+k*N]);
	      }
	    }
	  }
	}
	grad += nrm2(Array(dir+i,N,Px));
      }
      //store this iteration
      //results->add(alpha,lambda,grad);
      //then take a step
      if(linesearch_freq==0 || (iteration % linesearch_freq)==0 ){
	//Decreasing step size
	//TDH and pierre 18 jan 2011 try sqrt dec step size
	step=1/((double)iteration);
	//step=1/sqrt((double)iteration);
	if(verbose>=2)printf("grad %f step %f it %d\n",grad,step,iteration);
	take_step(clusters,alpha,dir,N,Px,step);
      }else{
	double cost_here,cost_step;
	std::map<double,double> cost_steps;
	std::map<double,double>::iterator step1,step2;
	for(i=0;i<N*Px;i++)old_alpha[i]=alpha[i];//copy alpha
	//compare current cost to cost after stepping in gradient direction
	cost_here=cost_step=calc_cost(clusters,amat,xmat,W,diffs,lambda);
	step = 0;
	cost_steps.insert(std::pair<double,double>(cost_here,0));
	while(cost_step<=cost_here){
	  take_step(clusters,alpha,dir,N,Px,1);
	  step += 1;
	  diffs.calc_diffs(clusters,amat,nrm2);
	  cost_step=calc_cost(clusters,amat,xmat,W,diffs,lambda);
	  if(verbose>=2)
	printf("cost %.10f step %f cost_here %f\n",cost_step,step,cost_here);
	  cost_steps.insert(std::pair<double,double>(cost_step,step));
	}
	for(int cuts=0;cuts<linesearch_points;cuts++){
	  step1=step2=cost_steps.begin();
	  step2++;
	  step = (step1->second + step2->second)/2;
	  for(i=0;i<N*Px;i++){
	    alpha[i]=old_alpha[i];
	  }
	  take_step(clusters,alpha,dir,N,Px,step);
	  diffs.calc_diffs(clusters,amat,nrm2);
	  cost_step=calc_cost(clusters,amat,xmat,W,diffs,lambda);
	  if(verbose>=2)printf("cost %.10f step %f %d\n",cost_step,step,cuts);
	  cost_steps.insert(std::pair<double,double>(cost_step,step));
	}
	cost_steps.clear();
      }
      if(iteration++ > maxit){
	if(tried_restart){
	  printf("max iteration %d exit\n",maxit);
	  delete old_alpha;
	  delete alpha;
	  delete xbar;
	  delete dir;
	  return results;
	}else{
	  if(verbose>=1)printf("max iterations, trying restart from x\n");
	  tried_restart=1;
	  iteration=1;
	  for(i=0;i<N*Px;i++)alpha[i]=x[i];
	}
      }
      //calculate differences
      diffs.calc_diffs(clusters,amat,nrm2);
      //check for joins
      JoinPair tojoin;
      while(dojoin(tojoin=check_clusters_thresh(&clusters,diffs,join_thresh))){
	//if(verbose>=1)
	//  printf("join: %d %d\n",tojoin.first->front(),tojoin.second->front());
	int ni=tojoin.first->size();
	int nj=tojoin.second->size();
	i=tojoin.first->front();
	j=tojoin.second->front();
	tojoin.first->insert(tojoin.first->end(),
			    tojoin.second->begin(),
			    tojoin.second->end());
	for(k=0;k<Px;k++){
	  alpha[i+k*N] = (alpha[i+k*N]*ni + alpha[j+k*N]*nj)/(ni+nj);
	  xbar[i+k*N] = (xbar[i+k*N]*ni + xbar[j+k*N]*nj)/(ni+nj);
	}
	clusters.erase(tojoin.second);
	iteration=1;
	if(clusters.size()>1){
	  diffs.calc_diffs(clusters,amat,nrm2);//inefficient
	}else{
	  grad=0;//so we can escape from the last optimization loop
	}
      }
    }//while(grad>=opt_thresh)
    if(verbose>=1)
    printf("solution iteration %d lambda %f nclusters %d\n",
	   iteration,lambda,(int)clusters.size());
    
    if(target_cluster == 0){
      //for each cluster, there may be several points. we store the
      //alpha value just in the row of the first point. thus here we
      //copy this value to the other rows before copying the optimal
      //alpha to results.
      for(it=clusters.begin();it!=clusters.end();it++){
	rows = *it;
	if(rows.size()>1){
	  for(i=1;i<rows.size();i++){
	    for(k=0;k<Px;k++){
	      alpha[rows[i]+k*N] = alpha[rows[0]+k*N];
	    }
	  }
	}
      }
      results->add(alpha,lambda,grad);
    }
    //haven't yet reached the target number of clusters, multiply
    //lambda by lambda_factor and continue along the path
    if((int)clusters.size()>target_cluster){
      old_lambda=lambda;
      lambda *= lambda_factor;
    }
    //if we have passed the target cluster number then decrease
    //lambda and go look for it!
    if((int)clusters.size()<target_cluster){
      if(verbose>=1){
	printf("missed target %d, going back for it\n",target_cluster);
      }
      lambda = (lambda+old_lambda)/2;
      clusters.clear();
      //reassign original clusters
      for(i=0;i<N;i++){
	rows.assign(1,i);
	clusters.push_back(rows);
      }
      //recopy original xbar
      for(i=0;i<N*Px;i++){
	xbar[i]=x[i];
      }
    }
    //this is the number of clusters that we were looking for,
    //save and quit!
    if((int)clusters.size()==target_cluster){
      for(it=clusters.begin();it!=clusters.end();it++){
	rows = *it;
	if(rows.size()>1){
	  for(i=1;i<rows.size();i++){
	    for(k=0;k<Px;k++){
	      alpha[rows[i]+k*N] = alpha[rows[0]+k*N];
	    }
	  }
	}
      }
      results->add(alpha,lambda,grad);
      if(verbose>=1)printf("got target cluster %d exit\n",target_cluster);
      delete old_alpha;
      delete alpha;
      delete xbar;
      delete dir;
      return results;
    }
  }	
  //TODO: consolidate cleanup... just use data structures that
  //automatically clean themselves up when the function exits.
  delete old_alpha;
  delete alpha;
  delete xbar;
  delete dir;
  return results;
}
Example #18
0
//=============================================================================
bool minres
(
 const CPPL::dsymatrix& A,
 CPPL::dcovector& x,
 const double& eps
)
{
  const CPPL::dcovector y(x);
  CPPL::dcovector r(y);

  double beta2(nrm2(r)), beta3;
  double rho0(1.0), rho1(beta2), rho2;
  double rhop(0.0);
  double c0(0.0), c1(-1.0), c2;
  double s0(0.0), s1(0.0), s2;
  double f(beta2);

  CPPL::dcovector p1(x.l), p2(r/beta2), p3;
  CPPL::dcovector q0(x.l), q1(x.l), q2;
  x.zero();
  p1.zero();
  q0.zero();
  q1.zero();

  int itc(0);
  const int itmax(2*x.l);
  while( (fabs(f)>eps || fabs(damax(y-A*x))>eps) && itc<itmax){
    std::cout << itc << " " << fabs(damax(y-A*x)) << std::endl;
    //std::cerr << "itc=" << itc << ", fabs(f)=" << fabs(f) << std::endl;
    CPPL::dcovector Ap2(A*p2), z;
    z =Ap2-beta2*p1;
    double alpha;
    alpha =Ap2%p2;
    p3 =z-alpha*p2;
    beta3 =nrm2(p3);
    p3 /=beta3;

    double d, h;
    d =(alpha-rhop*c0)*s1;
    h =beta2*s0;

    rhop =-beta2*c0*s1 -alpha*c1;
    rho2 =sqrt(pow(rhop,2)+pow(beta3,2));
    c2 =rhop/rho2;
    s2 =beta3/rho2;

    CPPL::dcovector zp;
    zp =p2 -(h/rho0)*q0;
    q2 =zp -(d/rho1)*q1;
    double t;
    t =f*c2;
    f *=s2;

    x +=(t/rho2)*q2;
    beta2=beta3;
    rho0=rho1; rho1=rho2;
    c0=c1; c1=c2;
    s0=s1; s1=s2;
    swap(p1,p2); swap(p2,p3);
    swap(q0,q1); swap(q1,q2);

    itc++;
  }
  std::cerr << "itc=" << itc << "  fabs(damax(y-A*x))=" << fabs(damax(y-A*x)) << std::endl;
  //std::cerr << "itc=" << itc << "  fabs(f)=" << fabs(f) << std::endl;

  if(itc<itmax){ return 0; }
  else{ return 1; }
}