int traceV(int i, int j) {
	int a, b, c, d, Vij;
	if (j-i < TURN)  return INFINITY_;

	a = canHairpin(i,j)?eH(i, j):INFINITY_;
	b = canStack(i,j)?eS(i, j) + V(i + 1, j - 1):INFINITY_;
	c = canStack(i,j)?VBI(i,j):INFINITY_;
	d = canStack(i,j)?VM(i,j):INFINITY_;
	
	Vij = V(i,j);
	structure[i] = j;
	structure[j] = i;

	if (Vij == a ) { 
		if (print_energy_decompose == 1) fprintf(energy_decompose_outfile, "i %5d j %5d Hairpin   %12.2f\n", i, j, eH(i, j)/100.00);
		total_en += eH(i,j);
		return Vij;
	} else if (Vij == b) { 
		if (print_energy_decompose == 1) fprintf(energy_decompose_outfile, "i %5d j %5d Stack     %12.2f\n", i, j, eS(i, j)/100.00);
		total_en += eS(i,j);
		traceV(i + 1, j - 1);
		return Vij;
	} else if (Vij == c) { 
		if (print_energy_decompose == 1) fprintf(energy_decompose_outfile, "i %5d j %5d IntLoop  ", i, j);
		traceVBI(i, j);
		return Vij;
	} else if (Vij == d) { 
		int eVM = traceVM(i, j);
		if (print_energy_decompose ==1) fprintf(energy_decompose_outfile, "i %5d j %5d MultiLoop %12.2f\n", i, j, (Vij-eVM)/100.0);
		total_en += (Vij-eVM);
		return Vij;
	}

	return 0;
}
Exemple #2
0
int traceV(int i, int j) {
	int a, b, c, d, Vij;
	if (j-i < TURN)  return INFINITY_;

	a = eH(i, j);
	b = eS(i, j) + V(i + 1, j - 1);
	if (eS(i, j) == 0) b = INFINITY_;
	c = VBI(i,j);
	d = VM(i,j);
	
	Vij = MIN(MIN(a, b), MIN(c, d));
	
	if (Vij == a && Vij != b && Vij != c && Vij != d) { 
		if (verbose == 1) 
			printf("i %5d j %5d Hairpin   %12.2f\n", i, j, eH(i, j)/100.00);
		total_en += eH(i,j);
		return Vij;
	} else if (Vij == b) { 
		if (verbose == 1) 
			printf("i %5d j %5d Stack     %12.2f\n", i, j, eS(i, j)/100.00);
		total_en += eS(i,j);
		structure[i + 1] = j - 1;
		structure[j - 1] = i + 1;
		traceV(i + 1, j - 1);
		return Vij;
	} else if (Vij == c) { 
		if (verbose == 1) 
			printf("i %5d j %5d IntLoop  ", i, j);
		traceVBI(i, j);
		return Vij;
	} else if (Vij == d && Vij != a && Vij != b && Vij != c) { 
		int eVM = traceVM(i, j);
		if (verbose ==1) 
			printf("i %5d j %5d MultiLoop %12.2f\n", i, j, (Vij-eVM)/100.0);
		total_en += (Vij-eVM);
		return Vij;
	} 
	return 0;
}
void omxComputeNumericDeriv::computeImpl(FitContext *fc)
{
	if (fc->fitUnits == FIT_UNITS_SQUARED_RESIDUAL ||
	    fc->fitUnits == FIT_UNITS_SQUARED_RESIDUAL_CHISQ) {  // refactor TODO
		numParams = 0;
		if (verbose >= 1) mxLog("%s: derivatives %s units are meaningless",
					name, fitUnitsToName(fc->fitUnits));
		return; //Possible TODO: calculate Hessian anyway?
	}

	int newWanted = fc->wanted | FF_COMPUTE_GRADIENT;
	if (wantHessian) newWanted |= FF_COMPUTE_HESSIAN;

	int nf = fc->calcNumFree();
	if (numParams != 0 && numParams != nf) {
		mxThrow("%s: number of parameters changed from %d to %d",
			 name, numParams, nf);
	}

	numParams = nf;
	if (numParams <= 0) { complainNoFreeParam(); return; }

	optima.resize(numParams);
	fc->copyEstToOptimizer(optima);
	paramMap.resize(numParams);
	for (int px=0,ex=0; px < numParams; ++ex) {
		if (fc->profiledOut[ex]) continue;
		paramMap[px++] = ex;
	}

	omxAlgebraPreeval(fitMat, fc);
	fc->createChildren(fitMat); // allow FIML rowwiseParallel even when parallel=false

	fc->state->countNonlinearConstraints(fc->state->numEqC, fc->state->numIneqC, false);
	int c_n = fc->state->numEqC + fc->state->numIneqC;
	fc->constraintFunVals.resize(c_n);
	fc->constraintJacobian.resize(c_n, numParams);
	if(c_n){
		omxCalcFinalConstraintJacobian(fc, numParams);
	}
	// TODO: Allow more than one hessian value for calculation

	int numChildren = 1;
	if (parallel && !fc->openmpUser && fc->childList.size()) numChildren = fc->childList.size();

	if (!fc->haveReferenceFit(fitMat)) return;

	minimum = fc->fit;

	hessWorkVector = new hess_struct[numChildren];
	if (numChildren == 1) {
		omxPopulateHessianWork(hessWorkVector, fc);
	} else {
		for(int i = 0; i < numChildren; i++) {
			omxPopulateHessianWork(hessWorkVector + i, fc->childList[i]);
		}
	}
	if(verbose >= 1) mxLog("Numerical Hessian approximation (%d children, ref fit %.2f)",
			       numChildren, minimum);

	hessian = NULL;
	if (wantHessian) {
		hessian = fc->getDenseHessUninitialized();
		Eigen::Map< Eigen::MatrixXd > eH(hessian, numParams, numParams);
		eH.setConstant(NA_REAL);

		if (knownHessian) {
			int khSize = int(khMap.size());
			Eigen::Map< Eigen::MatrixXd > kh(knownHessian, khSize, khMap.size());
			for (int rx=0; rx < khSize; ++rx) {
				for (int cx=0; cx < khSize; ++cx) {
					if (khMap[rx] < 0 || khMap[cx] < 0) continue;
					eH(khMap[rx], khMap[cx]) = kh(rx, cx);
				}
			}
		}
	}

	if (detail) {
		recordDetail = false; // already done it once
	} else {
		Rf_protect(detail = Rf_allocVector(VECSXP, 4));
		SET_VECTOR_ELT(detail, 0, Rf_allocVector(LGLSXP, numParams));
		for (int gx=0; gx < 3; ++gx) {
			SET_VECTOR_ELT(detail, 1+gx, Rf_allocVector(REALSXP, numParams));
		}
		SEXP detailCols;
		Rf_protect(detailCols = Rf_allocVector(STRSXP, 4));
		Rf_setAttrib(detail, R_NamesSymbol, detailCols);
		SET_STRING_ELT(detailCols, 0, Rf_mkChar("symmetric"));
		SET_STRING_ELT(detailCols, 1, Rf_mkChar("forward"));
		SET_STRING_ELT(detailCols, 2, Rf_mkChar("central"));
		SET_STRING_ELT(detailCols, 3, Rf_mkChar("backward"));

		SEXP detailRowNames;
		Rf_protect(detailRowNames = Rf_allocVector(STRSXP, numParams));
		Rf_setAttrib(detail, R_RowNamesSymbol, detailRowNames);
		for (int nx=0; nx < int(numParams); ++nx) {
			SET_STRING_ELT(detailRowNames, nx, Rf_mkChar(fc->varGroup->vars[nx]->name));
		}
		markAsDataFrame(detail);
	}

	gforward = REAL(VECTOR_ELT(detail, 1));
	gcentral = REAL(VECTOR_ELT(detail, 2));
	gbackward = REAL(VECTOR_ELT(detail, 3));
	Eigen::Map< Eigen::ArrayXd > Gf(gforward, numParams);
	Eigen::Map< Eigen::ArrayXd > Gc(gcentral, numParams);
	Eigen::Map< Eigen::ArrayXd > Gb(gbackward, numParams);
	Gf.setConstant(NA_REAL);
	Gc.setConstant(NA_REAL);
	Gb.setConstant(NA_REAL);

	calcHessianEntry che(this);
	CovEntrywiseParallel(numChildren, che);

	for(int i = 0; i < numChildren; i++) {
		struct hess_struct *hw = hessWorkVector + i;
		totalProbeCount += hw->probeCount;
	}
	delete [] hessWorkVector;
	if (isErrorRaised()) return;

	Eigen::Map< Eigen::ArrayXi > Gsymmetric(LOGICAL(VECTOR_ELT(detail, 0)), numParams);
	double gradNorm = 0.0;
	
	double feasibilityTolerance = Global->feasibilityTolerance;
	for (int px=0; px < numParams; ++px) {
		// factor out simliar code in ComputeNR
		omxFreeVar &fv = *fc->varGroup->vars[ paramMap[px] ];
		if ((fabs(optima[px] - fv.lbound) < feasibilityTolerance && Gc[px] > 0) ||
		    (fabs(optima[px] - fv.ubound) < feasibilityTolerance && Gc[px] < 0)) {
			Gsymmetric[px] = false;
			continue;
		}
		gradNorm += Gc[px] * Gc[px];
		double relsym = 2 * fabs(Gf[px] + Gb[px]) / (Gb[px] - Gf[px]);
		Gsymmetric[px] = (Gf[px] < 0 && 0 < Gb[px] && relsym < 1.5);
		if (checkGradient && verbose >= 2 && !Gsymmetric[px]) {
			mxLog("%s: param[%d] %d %f", name, px, Gsymmetric[px], relsym);
		}
	}
	
	fc->grad.resize(fc->numParam);
	fc->grad.setZero();
	fc->copyGradFromOptimizer(Gc);
	
	if(c_n){
		fc->inequality.resize(fc->state->numIneqC);
		fc->analyticIneqJacTmp.resize(fc->state->numIneqC, numParams);
		fc->myineqFun(true, verbose, omxConstraint::LESS_THAN, false);
	}

	gradNorm = sqrt(gradNorm);
	double gradThresh = Global->getGradientThreshold(minimum);
	//The gradient will generally not be near zero at a local minimum if there are equality constraints 
	//or active inequality constraints:
	if ( checkGradient && gradNorm > gradThresh && !(fc->state->numEqC || fc->inequality.array().sum()) ) {
		if (verbose >= 1) {
			mxLog("Some gradient entries are too large, norm %f", gradNorm);
		}
		if (fc->getInform() < INFORM_NOT_AT_OPTIMUM) fc->setInform(INFORM_NOT_AT_OPTIMUM);
	}

	fc->setEstFromOptimizer(optima);
	// auxillary information like per-row likelihoods need a refresh
	ComputeFit(name, fitMat, FF_COMPUTE_FIT, fc);
	fc->wanted = newWanted;
}
//Shel: Function for scoring a node (recursive)
int ScoreNode(TreeNode* node, int* RNA, nndb_constants* param, int length){
	int result;
	result = 0;
	int *pairedChildren;
	pairedChildren = NULL;
	int numPairedChildren;
	numPairedChildren = 0;
	int i;
	
	for (i = 0 ; i < node->numChildren ; i++){ 
       // find location and number of paired children 
       //and add scores of associated loops
		if ((node->children[i])->isPair) {
			//Manoj: Changed the code to generate warnings in case of pairing in structure which are not valid
			 /*Base lb = (node->children[i])->lowBase.base;
                                char lbChar;
                                if(lb==1)lbChar='A';else if(lb==2)lbChar='C';else if(lb==4)lbChar='G';else if(lb==8)lbChar='U';else lbChar="X";
                                Base hb = (node->children[i])->highBase.base;
                                char hbChar;
                                if(hb==1)hbChar='A';else if(hb==2)hbChar='C';else if(hb==4)hbChar='G';else if(hb==8)hbChar='U';else hbChar="X";
                                printf("CHECKING: bases %d and %d (%c%c) if they can pair!\n",(node->children[i])->lowBase.index, (node->children[i])->highBase.index,lbChar, hbChar);*/
			if(!canPair((node->children[i])->lowBase.base, (node->children[i])->highBase.base)){
				Base lb = (node->children[i])->lowBase.base;
				char lbChar;
				if(lb==1)lbChar='A';else if(lb==2)lbChar='C';else if(lb==4)lbChar='G';else if(lb==8)lbChar='U';else lbChar='X';
	                        Base hb = (node->children[i])->highBase.base;
				char hbChar;
                                if(hb==1)hbChar='A';else if(hb==2)hbChar='C';else if(hb==4)hbChar='G';else if(hb==8)hbChar='U';else hbChar='X';
				printf("WARNING: bases %d and %d (%c%c) can't pair; structure cannot be scored. Exiting.\n",(node->children[i])->lowBase.index, (node->children[i])->highBase.index,lbChar, hbChar);
				exit(-1);
				//continue;
			}
			//printf("Yes they can pair\n");
			result += ScoreNode(node->children[i], RNA, param, length);
			numPairedChildren += 1;
			pairedChildren = realloc(pairedChildren, sizeof(int) * numPairedChildren);
			pairedChildren[numPairedChildren - 1] = i;
		}
	}
	
	if (node->lowBase.index != 0) 
	{
		
		if (numPairedChildren == 0)  // must be a hairpin
		{
			
			 int energy = eH(node->lowBase.index,node->highBase.index, RNA, param);
          result += energy;
			if(printOn2)printf("%d \t %d: Hairpin Loop with energy %.2f\n",  node->lowBase.index, node->highBase.index, (double)energy/100);
		}
		else if (numPairedChildren == 1)  // must be stack, bulge, or internal
		{
			if (node->numChildren == 1)  // must be stack 
			{
          	int energy = eS(node->lowBase.index, node->highBase.index, RNA, param);
	       	result += energy; 
	     		if(printOn2)printf("%d \t %d: Stacked pair with energy %.2f\n",  node->lowBase.index, node->highBase.index, (double)energy/100);
			}
			else 
			{  // must be bulge or internal 
			   
				int energy = eL(node->lowBase.index, node->highBase.index, 
				              node->children[pairedChildren[0]]->lowBase.index,
				              node->children[pairedChildren[0]]->highBase.index,
								  RNA, param);
            result += energy;
				if(printOn2)printf("%d \t %d: Bulge or Inernal Loop with energy %.2f\n",  node->lowBase.index, node->highBase.index, (double)energy/100);
			}
		}
		else  // must be a multi-loop
		{	
			int energy = eM(node, pairedChildren, numPairedChildren, RNA, param);
			result += energy;
			if(printOn2)printf("%d \t %d: Multi-loop with energy %.2f\n",  node->lowBase.index, node->highBase.index, (double)energy/100);
		}
	}
	else { // must be external
      int energy = eE(node, pairedChildren, numPairedChildren, RNA, param, length);
		result += energy; 
		if(printOn2)printf("%d \t %d: External loop with energy %.2f\n",  node->lowBase.index, node->highBase.index, (double)energy/100);
	}

	return result;
}
Exemple #5
0
int calculate(int len, int nThreads) { 
	int b, i, j;
#ifdef _OPENMP
	if (nThreads>0) omp_set_num_threads(nThreads);
#endif
#ifdef _OPENMP
#pragma omp parallel
#pragma omp master
	fprintf(stdout,"Thread count: %3d \n",omp_get_num_threads());
#endif


	for (b = TURN+1; b <= len-1; b++) {
#ifdef _OPENMP
#pragma omp parallel for private (i,j) schedule(guided)
#endif
		for (i = 1; i <= len - b; i++) {
			j = i + b;
			int flag = 0, newWM = INFINITY_; 
			if (canPair(RNA[i], RNA[j])) {
				flag = 1;
				int eh = canHairpin(i,j)?eH(i,j):INFINITY_; //hair pin
				int es = canStack(i,j)?eS(i,j)+getShapeEnergy(i)+getShapeEnergy(j)+V(i+1,j-1):INFINITY_; // stack
				if (j-i > 6) { // Internal Loop BEGIN
					int p=0, q=0;
					int VBIij = INFINITY_;
					for (p = i+1; p <= MIN(j-2-TURN,i+MAXLOOP+1) ; p++) {
						int minq = j-i+p-MAXLOOP-2;
						if (minq < p+1+TURN) minq = p+1+TURN;
						int maxq = (p==i+1)?(j-2):(j-1);
						for (q = minq; q <= maxq; q++) {
							if (!canPair(RNA[p], RNA[q])) continue;
							if (!canILoop(i,j,p,q)) continue;
							VBIij = MIN(eL(i, j, p, q) + V(p,q), VBIij);
						}
					}
					VBI(i,j) = VBIij;
					V(i,j) = V(i,j) + getShapeEnergy(i) + getShapeEnergy(j);

				} // Internal Loop END
				if (j-i > 10) { // Multi Loop BEGIN
					int h;
					int VMij, VMijd, VMidj, VMidjd;
					VMij = VMijd = VMidj = VMidjd = INFINITY_;
					for (h = i+TURN+1; h <= j-1-TURN; h++) { 
						VMij = MIN(VMij, WMU(i+1,h-1) + WML(h,j-1)); 
						VMidj = MIN(VMidj, WMU(i+2,h-1) + WML(h,j-1)); 
						VMijd = MIN(VMijd, WMU(i+1,h-1) + WML(h,j-2)); 
						VMidjd = MIN(VMidjd, WMU(i+2,h-1) + WML(h,j-2)); 
					}
					int d3 = canSS(j-1)?Ed3(i,j,j-1):INFINITY_;
					int d5 = canSS(i+1)?Ed5(i,j,i+1):INFINITY_;
					VMij = MIN(VMij, (VMidj + d5 +Ec)) ;
					VMij = MIN(VMij, (VMijd + d3 +Ec));
					VMij = MIN(VMij, (VMidjd + d5 + d3+ 2*Ec));
					VMij = VMij + Ea + Eb + auPenalty(i,j);
					VM(i,j) = canStack(i,j)?VMij:INFINITY_;
				} // Multi Loop END
				V(i,j) = MIN4(eh,es,VBI(i,j),VM(i,j));
			}
			else V(i,j) = INFINITY_;
			if (j-i > 4) { // WM BEGIN
				int h; 
				for (h = i+TURN+1 ; h <= j-TURN-1; h++) {
					//ZS: This sum corresponds to when i,j are NOT paired with each other.
					//So we need to make sure only terms where i,j aren't pairing are considered. 
					newWM = (!forcePair(i,j))?MIN(newWM, WMU(i,h-1) + WML(h,j)):newWM;
				}
				newWM = MIN(V(i,j) + auPenalty(i,j) + Eb, newWM); 
				newWM = canSS(i)?MIN(V(i+1,j) + Ed3(j,i+1,i) + auPenalty(i+1,j) + Eb + Ec, newWM):newWM; //i dangle
				newWM = canSS(j)?MIN(V(i,j-1) + Ed5(j-1,i,j) + auPenalty(i,j-1) + Eb + Ec, newWM):newWM;  //j dangle
				newWM = (canSS(i)&&canSS(j))?MIN(V(i+1,j-1) + Ed3(j-1,i+1,i) + Ed5(j-1,i+1,j) + auPenalty(i+1,j-1) + Eb + 2*Ec, newWM):newWM; //i,j dangle
				newWM = canSS(i)?MIN(WMU(i+1,j) + Ec, newWM):newWM; //i dangle
				newWM = canSS(j)?MIN(WML(i,j-1) + Ec, newWM):newWM; //j dangle
				WMU(i,j) = WML(i,j) = newWM;
			} // WM END
		}
	}
	for (j = TURN+2; j <= len; j++) {
		int i, Wj, Widjd, Wijd, Widj, Wij, Wim1;
		Wj = INFINITY_;
		for (i = 1; i < j-TURN; i++) {
			Wij = Widjd = Wijd = Widj = INFINITY_;
			Wim1 = MIN(0, W[i-1]); 
			Wij = V(i, j) + auPenalty(i, j) + Wim1;
			Widjd = (canSS(i)&&canSS(j))?V(i+1,j-1) + auPenalty(i+1,j-1) + Ed3(j-1,i + 1,i) + Ed5(j-1,i+1,j) + Wim1:Widjd;
			Wijd = canSS(j)?V(i,j-1) + auPenalty(i,j-1) + Ed5(j-1,i,j) + Wim1:Wijd;
			Widj = canSS(i)?V(i+1, j) + auPenalty(i+1,j) + Ed3(j,i + 1,i) + Wim1:Widj;
			Wj = MIN(MIN4(Wij, Widjd, Wijd, Widj), Wj); 
		}
		W[j] = canSS(j)?MIN(Wj, W[j-1]):Wj;
	}
	return W[len];
}
Exemple #6
0
int main(int argc, char *argv[]) {
    if(argc != 6 && argc != 7) {
        printf("usage: rrg N n s D seed (id_string)\n");
        return 1;
        }
    time_t t1,t2,tI,tF;
    ITensor U,Dg,P,S;
    Index ei;

    // RRG structure parameters
    const int    N  = atoi(argv[1]); // should be n*(power of 2)
    const int    n  = atoi(argv[2]); // initial blocking size
    int          w  = n;             // block size (scales with m)
    int          ll = 0;             // lambda block index
    int          m  = 0;             // RG scale factor

    // AGSP and subspace parameters
    const double t = 0.3;            // Trotter temperature
    const int    M = 100;            // num Trotter steps
    const int    k = 1;              // power of Trotter op (just use 1)
    const int    s = atoi(argv[3]);  // formal s param
    const int    D = atoi(argv[4]);  // formal D param
    
    // computational settings
    const bool   doI = true; // diag restricted Hamiltonian iteratively?

    // setup random sampling
    std::random_device r;
    const int seed = atoi(argv[5]);
    fprintf(stderr,"seed is %d\n",seed);
    std::mt19937 gen(seed);
    std::uniform_real_distribution<double> udist(0.0,1.0);

    FILE *sxfl,*syfl,*szfl,*gsfl;
    char id[128],sxnm[256],synm[256],sznm[256],gsnm[256];
    if(argc == 6) sprintf(id,"rrg-L%d-s%d-D%d",N,s,D);
    else sprintf(id,"%s",argv[6]);
    strcat(sxnm,id); strcat(sxnm,"-sx.dat");
    strcat(synm,id); strcat(synm,"-sy.dat");
    strcat(sznm,id); strcat(sznm,"-sz.dat");
    strcat(gsnm,id); strcat(gsnm,"-gs.dat");
    sxfl = fopen(sxnm,"a");
    syfl = fopen(synm,"a");
    szfl = fopen(sznm,"a");
    gsfl = fopen(gsnm,"a");

    // initialize Hilbert subspaces for each level m = 0,...,log(N/n)
    vector<SpinHalf> hsps;
    for(int x = n ; x <= N ; x *= 2) hsps.push_back(SpinHalf(x));
    SpinHalf hs = hsps.back();
 
    // generate product basis over m=0 Hilbert space
    auto p = int(pow(2,n));
    vector<MPS> V1;
    for(int i = 0 ; i < p ; ++i) {
        InitState istate(hsps[0],"Dn");
        for(int j = 1 ; j <= n ; ++j)
            if(i/(int)pow(2,j-1)%2) istate.set(j,"Up");
        V1.push_back(MPS(istate));
        }
    MPS bSpaceL(hsps[0]);
    MPS bSpaceR(hsps[0]);
    makeVS(V1,bSpaceL,LEFT);
    makeVS(V1,bSpaceR,RIGHT);

    // Hamiltonian parameters
    const double Gamma = 2.0;
    vector<double> J(2*(N-1));
    fprintf(stdout,"# Hamiltonian terms Jx1,Jy1,Jx2,... (seed=%d)\n",seed);
    for(int i = 0 ; i < N-1 ; ++i) {
        J[2*i+0] = pow(udist(gen),Gamma);
        J[2*i+1] = pow(udist(gen),Gamma);
        fprintf(stdout,"%16.14f,%16.14f",J[2*i],J[2*i+1]);
        if(i != N-2) fprintf(stdout,",");
        }
    fprintf(stdout,"\n");
    fflush(stdout);

    // initialize H for full system and extract block Hamiltonians
    AutoMPO autoH(hs);
    std::stringstream sts;
    auto out = std::cout.rdbuf(sts.rdbuf());
    vector<vector<MPO> > Hs(hsps.size());
    for(int i = 1 ; i < N ; ++i) {
        autoH += (J[2*(i-1)]-J[2*(i-1)+1]),"S+",i,"S+",i+1;
        autoH += (J[2*(i-1)]-J[2*(i-1)+1]),"S-",i,"S-",i+1;
        autoH += (J[2*(i-1)]+J[2*(i-1)+1]),"S+",i,"S-",i+1;
        autoH += (J[2*(i-1)]+J[2*(i-1)+1]),"S-",i,"S+",i+1;
        }
    auto H = toMPO<ITensor>(autoH,{"Exact",true});
    std::cout.rdbuf(out);

    for(auto i : args(hsps)) extractBlocks(autoH,Hs[i],hsps[i]);
    
    vector<MPO> prodSz,prodSx,projSzUp,projSzDn,projSxUp,projSxDn;
    for(auto& it : hsps) { 
        auto curSz = sysOp(it,"Sz",2.0).toMPO(); prodSz.push_back(curSz);
        auto curSx = sysOp(it,"Sx",2.0).toMPO(); prodSx.push_back(curSx);
        auto curSzUp = sysOp(it,"Id").toMPO(); curSzUp.plusEq(curSz); curSzUp /= 2.0;
        auto curSzDn = sysOp(it,"Id").toMPO(); curSzDn.plusEq(-1.0*curSz); curSzDn /= 2.0;
        auto curSxUp = sysOp(it,"Id").toMPO(); curSxUp.plusEq(curSx); curSxUp /= 2.0;
        auto curSxDn = sysOp(it,"Id").toMPO(); curSxDn.plusEq(-1.0*curSx); curSxDn /= 2.0;
        projSzUp.push_back(curSzUp); projSzDn.push_back(curSzDn);
        projSxUp.push_back(curSxUp); projSxDn.push_back(curSxDn);
        }   
 
    // approximate the thermal operator exp(-H/t)^k using Trotter
    // and MPO multiplication; temperature of K is k/t
    time(&tI);
    MPO eH(hs);
    twoLocalTrotter(eH,t,M,autoH);
    auto K = eH;    
    for(int i = 1 ; i < k ; ++i) {
        nmultMPO(eH,K,K,{"Cutoff",eps,"Maxm",MAXBD});
        K.Aref(1) *= 1.0/norm(K.A(1));
        }
    
    // INITIALIZATION: reduce dimension by sampling from initial basis, either
    // bSpaceL or bSpaceR depending on how the merge will work
    vector<MPS> Spre;
    for(ll = 0 ; ll < N/n ; ll++) {
        auto xs = ll % 2 ? 1 : n; // location of dangling Select index
        auto cur = ll % 2 ? bSpaceR : bSpaceL;
        Index si("ext",s,Select);
       
        // return orthonormal basis of evecs
        auto eigs = diagHermitian(-overlapT(cur,Hs[0][ll],cur),P,S,{"Maxm",s});
        cur.Aref(xs) *= P*delta(commonIndex(P,S),si);
        regauge(cur,xs,{"Truncate",false});

        Spre.push_back(cur);
        }
    time(&t2);
    fprintf(stderr,"initialization: %.f s\n",difftime(t2,tI));

    // ITERATION: proceed through RRG hierarchy, increasing the scale m
    vector<MPS> Spost;
    for(m = 0 ; (int)Spre.size() > 1 ; ++m,w*=2) {
        fprintf(stderr,"Level %d (w = %d)\n",m,w);
        auto hs = hsps[m];
        auto DD = D;//max(4,D/(int(log2(N/n)-m)));
        auto thr = 1e-8;
        Spost.clear();

        // EXPAND STEP: for each block, expand dimension of subspace with AGSP operators
        for(ll = 0 ; ll < N/w ; ++ll) {
            MPO A(hs) , Hc = Hs[m][ll];
            MPS pre = Spre[ll] , ret(hs);
            int xs = ll % 2 ? 1 : w;

            // STEP 1: extract filtering operators A from AGSP K
            time(&t1);
            restrictMPO(K,A,w*ll+1,DD,ll%2);
            time(&t2);
            fprintf(stderr,"trunc AGSP: %.f s\n",difftime(t2,t1));

            // STEP 2: expand subspace using the mapping A:pre->ret
            time(&t1);
            ret = applyMPO(A,pre,ll%2,{"Cutoff",eps,"Maxm",MAXBD});
            time(&t2);
            fprintf(stderr,"apply AGSP: %.f s\n",difftime(t2,t1));

            // rotate into principal components of subspace, poxsibly reducing dimension
            // and stabilizing numerics, then store subspace in eigenbasis of block H
            time(&t1); 
            diagHermitian(overlapT(ret,ret),U,Dg,{"Cutoff",thr});
            time(&t2);
            ei = Index("ext",int(commonIndex(Dg,U)),Select);
            Dg.apply(invsqrt);
            ret.Aref(xs) *= dag(U)*Dg*delta(prime(commonIndex(Dg,U)),ei);
            fprintf(stderr,"rotate MPS: %.f s\n",difftime(t2,t1));

            auto eigs = diagHermitian(-overlapT(ret,Hs[m][ll],ret),P,S);
            ret.Aref(xs) *= P*delta(commonIndex(P,S),ei);
            ret.Aref(xs) *= 1.0/sqrt(overlapT(ret,ret).real(ei(1),prime(ei)(1)));
            regauge(ret,xs,{"Cutoff",eps});

            fprintf(stderr,"max m: %d\n",maxM(ret));
            Spost.push_back(ret);
            
            }

        // MERGE/REDUCE STEP: construct tensor subspace, sample to reduce dimension
        Spre.clear();
        for(ll = 0 ; ll < N/w ; ll+=2) {
            auto spL = Spost[ll];                // L subspace
            auto spR = Spost[ll+1];              // R subspace

            // STEP 1: find s lowest eigenpairs of restricted H
            time(&t1);
            auto tpH = tensorProdContract(spL,spR,Hs[m+1][ll/2]);
            tensorProdH<ITensor> resH(tpH);
            resH.diag(s,doI);
            P = resH.eigenvectors();
            time(&t2);
            fprintf(stderr,"diag restricted H: %.f s\n",difftime(t2,t1));

            // STEP 2: tensor viable sets on each side and reduce dimension
            MPS ret(hsps[m+1]);
            time(&t1);
            tensorProduct(spL,spR,ret,P,(ll/2)%2);
            time(&t2);
            fprintf(stderr,"tensor product (ll=%d): %.f s\n",ll,difftime(t2,t1));
 
            Spre.push_back(ret);
            }
        }

    // EXIT: extract two lowest energy candidate states to determine gap
    auto res = Spre[0];
    auto fi = Index("ext",s/2,Select);
    vector<MPS> resSz = {res,res};
    
    // project to Sz sectors of the eigenspace
    diagHermitian(overlapT(res,prodSz[m],res),U,Dg);
    resSz[0].Aref(N) *= U*delta(commonIndex(U,Dg),fi);
    diagHermitian(overlapT(res,-1.0*prodSz[m],res),U,Dg);
    resSz[1].Aref(N) *= U*delta(commonIndex(U,Dg),fi);
   
    vector<MPS> evecs(2);
    for(int i : range(2)) {
        auto fc = resSz[i];
        
        // diagonalize H within the Sz sectors
        auto eigs = diagHermitian(-overlapT(fc,H,fc),P,S);
        fc.Aref(N) *= (P*setElt(commonIndex(P,S)(1)));
        
        fc.orthogonalize({"Cutoff",epx,"Maxm",MAXBD});
        fc.normalize();
        if(i == 0)
            fprintf(stderr,"RRG gs energy: %17.14f\n",overlap(fc,H,fc));
        evecs[i] = fc;
        }
    time(&t2);

    Real vz,vx;
    vz = overlap(evecs[0],prodSz[m],evecs[0]); vx = overlap(evecs[0],prodSx[m],evecs[0]);
    fprintf(stderr,"Vz,vx of 0 is: %17.14f,%17.14f\n",vz,vx);
    vz = overlap(evecs[1],prodSz[m],evecs[1]); vx = overlap(evecs[1],prodSx[m],evecs[1]);
    fprintf(stderr,"Vz,vx of 1 is: %17.14f,%17.14f\n",vz,vx);
    int x1_up = (vx > 0.0 ? 1 : 0);

    evecs[0] = exactApplyMPO(evecs[0],projSzUp[m],{"Cutoff",epx});
    evecs[0] = exactApplyMPO(evecs[0],projSxUp[m],{"Cutoff",epx});
    evecs[1] = exactApplyMPO(evecs[1],projSzDn[m],{"Cutoff",epx});
    evecs[1] = exactApplyMPO(evecs[1],(x1_up?projSxUp[m]:projSxDn[m]),{"Cutoff",epx});
    for(auto& it : evecs) it.normalize();

    fprintf(stderr,"gs candidate energy: %17.14f\nRRG BD ",overlap(evecs[0],H,evecs[0]));
    for(const auto& it : evecs) fprintf(stderr,"%d ",maxM(it));
    fprintf(stderr,"\telapsed: %.f s\n",difftime(t2,tI));

    // CLEANUP: use DMRG to improve discovered evecs
    vector<Real> evals(2),e_prev(2);
    int max_iter = 30 , used_max = 0;
    Real flr = 1e-13 , over_conv = 1e-1 , gap = 1.0 , conv = over_conv*gap , max_conv = 1.0;
    for(int i = 0 ; i < (int)evecs.size() ; ++i) evals[i] = overlap(evecs[i],H,evecs[i]);
    for(int i = 0 ; (i < 2 || conv < max_conv) && i < max_iter ; ++i) {
        e_prev = evals;

        time(&t1);
        evals = dmrgMPO(H,evecs,8,{"Penalty",0.1,"Cutoff",epx});
        time(&t2);
        
        gap = evals[1]-evals[0];

        max_conv = 0.0;
        for(auto& j : range(2))
            if(fabs(e_prev[j]-evals[j]) > max_conv) max_conv = e_prev[j]-evals[j];
        
        fprintf(stderr,"DMRG BD ");
        for(const auto& it : evecs) fprintf(stderr,"%3d ",maxM(it));
        fprintf(stderr,"\tgap: %e\tconv=%9.2e,%9.2e\telapsed: %.f s\n",gap,
            e_prev[0]-evals[0],e_prev[1]-evals[1],difftime(t2,t1));
        conv = max(over_conv*gap,flr);
        if(i == max_iter) used_max = 1;
        }

    for(int i = 0 ; i < (int)evecs.size() ; ++i) {
        vz = overlap(evecs[i],prodSz[m],evecs[i]); vx = overlap(evecs[i],prodSx[m],evecs[i]);
        fprintf(stderr,"Vz,vx of %d is: %12.9f,%12.9f\n",i,vz,vx);
        }

    evecs[0] = exactApplyMPO(evecs[0],projSzUp[m],{"Cutoff",1e-16});
    evecs[0] = exactApplyMPO(evecs[0],projSxUp[m],{"Cutoff",1e-16});
    evecs[1] = exactApplyMPO(evecs[1],projSzDn[m],{"Cutoff",1e-16});
    evecs[1] = exactApplyMPO(evecs[1],(x1_up?projSxUp[m]:projSxDn[m]),{"Cutoff",1e-16});
    for(auto& it : evecs) it.normalize();
    for(auto i : range(evecs.size())) evals[i] = overlap(evecs[i],H,evecs[i]);
    time(&tF);

    auto gsR = evecs[0];
    auto ee = measEE(gsR,N/2);
    gap = evals[1]-evals[0];

    fprintf(stderr,"gs: %17.14f gap: %15.9e ee: %10.8f\n",evals[0],gap,ee);
    fprintf(gsfl,"# GS data (L=%d s=%d D=%d seed=%d time=%.f)\n",N,s,D,seed,difftime(tF,tI));
    if(used_max) fprintf(gsfl,"# WARNING max iterations reached\n");
    fprintf(gsfl,"%17.14f\t%15.9e\t%10.8f\n",evals[0],gap,ee);

    // Compute two-point correlation functions in ground state via usual MPS method
    fprintf(sxfl,"# SxSx corr matrix (L=%d s=%d D=%d seed=%d)\n",N,s,D,seed);
    fprintf(syfl,"# SySy corr matrix (L=%d s=%d D=%d seed=%d)\n",N,s,D,seed);
    fprintf(szfl,"# SzSz corr matrix (L=%d s=%d D=%d seed=%d)\n",N,s,D,seed);
    for(int i = 1 ; i <= N ; ++i) {
        gsR.position(i,{"Cutoff",0.0});
        auto SxA = hs.op("Sx",i); auto SyA = hs.op("Sy",i); auto SzA = hs.op("Sz",i);
        for(int j = 1 ; j <= N ; ++j) {
            if(j <= i) {
                fprintf(sxfl,"%15.12f\t",0.0);
                fprintf(syfl,"%15.12f\t",0.0);
                fprintf(szfl,"%15.12f\t",0.0); 
            } else {
                auto SxB = hs.op("Sx",j); auto SyB = hs.op("Sy",j); auto SzB = hs.op("Sz",j);
                fprintf(sxfl,"%15.12f\t",measOp(gsR,SxA,i,SxB,j));
                fprintf(syfl,"%15.12f\t",measOp(gsR,SyA,i,SyB,j));
                fprintf(szfl,"%15.12f\t",measOp(gsR,SzA,i,SzB,j));
                }
            }
        fprintf(sxfl,"\n");
        fprintf(syfl,"\n");
        fprintf(szfl,"\n");
        }

    fclose(sxfl);
    fclose(syfl);
    fclose(szfl);
    fclose(gsfl);

    return 0;
    
    }