void DistributedOverlapMatrix<Rank>::SetupRank(Wavefunction<Rank> &srcPsi, int opRank)
{
	//Need a copy of srcPsi for future reference (only on first call to this function)
	if (!HasPsi)
	{
		Psi = srcPsi.Copy();
		HasPsi = true;
	}

	//Check that distribution for opRank has not changed since last call.
	//Also check that typeID of representation is the same
/*	int curDistribOpRank = Psi->GetRepresentation()->GetDistributedModel()->GetDistribution()(opRank);
	int srcDistribOpRank = srcPsi.GetRepresentation()->GetDistributedModel()->GetDistribution()(opRank);
	if ( (curDistribOpRank != srcDistribOpRank) )
	{
		Psi = srcPsi.Copy();

		//NB: We reset IsSetup flag for _all_ ranks!
		IsSetupRank = false;
	}
*/
	if (!IsSetupRank(opRank))
	{
		//Sanity check: operation rank should be less than rank of wavefunction (and nonzero, duh)
		assert(opRank < Rank);
		assert(opRank > -1);

		//Assert non-orthogonal rank opRank
		assert (!srcPsi.GetRepresentation()->IsOrthogonalBasis(opRank));

		//Create Epetra map for this rank
		WavefunctionMaps(opRank) = CreateWavefunctionMultiVectorEpetraMap<Rank>(Psi, opRank);

		//Setup overlap matrix
		SetupOverlapMatrixRank(srcPsi, opRank);

		//Setup work multivectors
		blitz::Array<cplx, 3> psiData = MapToRank3(srcPsi.GetData(), opRank, 1);
		int numVectors = 2 * psiData.extent(0) * psiData.extent(2);
		InputVector(opRank) = Epetra_MultiVector_Ptr( new Epetra_MultiVector(*WavefunctionMaps(opRank), numVectors, false) );
		OutputVector(opRank) = Epetra_MultiVector_Ptr( new Epetra_MultiVector(*WavefunctionMaps(opRank), numVectors, false) );

		//Allocate mem for multivectors
		//InputData.resize(srcPsi.GetData().size(), 1);
		//OutputData.resize(srcPsi.GetData().size(), 1);
	
		//Setup Amesos solvers
		SetupOverlapSolvers(srcPsi, opRank);
		
		//Flag this rank as set up
		IsSetupRank(opRank) = true;
	}
}
void Propagator<Rank>::Setup(const Parameter &param, const cplx &dt, const Wavefunction<Rank> &psi, int rank)
{
	firstIndex i;
	secondIndex j;
	thirdIndex k;

	//Set class parameters
	N = psi.GetRepresentation()->GetFullShape()(rank);
	PropagateRank = rank;
	Param = param;

	//create some temporary arrays
	Array<double, 2> evInvReal; //inverse eigenvector matrix

	//Call setup routines to get the eigenvector decomposition
	setup(N, param, Eigenvalues, Eigenvectors, evInvReal);

	//We need the eigenvalues and vectors in complex format
	Array<cplx, 2> evExp(Eigenvectors.shape());     //eigenvectors scaled by exp(eigenvalues)
	Array<cplx, 2> evDiff(Eigenvectors.shape());    //eigenvectors scaled by eigenvalues
	Array<cplx, 2> evInv(Eigenvectors.shape());  //inverse eigenvector matrix

	evInv = evInvReal(tensor::i, tensor::j);

	//scale eigenvectors by complex rotation
	//The missing minus sign in the exponent is included in the matrix.
	evExp = Eigenvectors(i,j) * exp( I * dt * Eigenvalues(j) / (2.0 * Mass));
	evDiff = - Eigenvectors(i,j) * Eigenvalues(j) / (2.0 * Mass);

	//Create full matrix to propagate wavefunction
	PropagationMatrix.resize(Eigenvectors.shape());
	MatrixMatrixMultiply(evExp, evInv, PropagationMatrix);

	//Create full differentiation matrix
	DiffMatrix.resize(Eigenvectors.shape());
	MatrixMatrixMultiply(evDiff, evInv, DiffMatrix);

	//Allocate temp data
	TempData.resize(Eigenvalues.extent(0));
}
void DistributedOverlapMatrix<Rank>::SetupOverlapSolvers(Wavefunction<Rank> &psi, int opRank)
{
	//Set up Epetra LinearProblem with overlap for this rank and input/output multivectors
	EpetraProblems(opRank) = Epetra_LinearProblem_Ptr( new Epetra_LinearProblem(OverlapMatrices(opRank).get(), OutputVector(opRank).get(), InputVector(opRank).get()) );

	//Determine solver type. Use SuperLU_dist if opRank is distributed, else KLU
	Amesos Factory;
	std::string SolverType;
	if (psi.GetRepresentation()->GetDistributedModel()->IsDistributedRank(opRank))
	{
		SolverType = "Amesos_Superludist";
	}
	else
	{
		SolverType = "Amesos_Klu";
	}

	//Create Amesos solver for this rank
	Solvers(opRank) = Amesos_BaseSolver_Ptr( Factory.Create(SolverType, *EpetraProblems(opRank)) );

	//Check that requested solver exists in Amesos build
	if (Solvers(opRank) == 0)
	{
		throw std::runtime_error("Specified Amesos solver not available");
	}

	//Setup the parameter list for the solver (TODO: get these from Python config section)
	Teuchos::ParameterList List;
	List.set("MatrixType", "symmetric");
	List.set("PrintTiming", false);
	List.set("PrintStatus", false);
	List.set("ComputeTrueResidual", false);
	Solvers(opRank)->SetParameters(List);

	//Perform symbolic factorization
	Solvers(opRank)->SymbolicFactorization();
	Solvers(opRank)->NumericFactorization();
}
void DistributedOverlapMatrix<Rank>::SetupOverlapMatrixRank(Wavefunction<Rank> &srcPsi, int opRank)
{
	//Get overlap matrix for opRank (and full col data)
	OverlapMatrix::Ptr overlap = srcPsi.GetRepresentation()->GetGlobalOverlapMatrix(opRank);
	blitz::Array<double, 2> overlapFullCol = overlap->GetOverlapFullCol();

	//Overlap row size
	int numSuperDiagonals = overlap->GetSuperDiagonals();
	int colSizeFull = overlap->GetBasisSize();

	//Epetra CrsMatrix for opRank overlap
	//cout << "Allocating Epetra CrsMatrix " << numSuperDiagonals << endl;
	//int numTotalBands = 2 * numSuperDiagonals - 1;
	//Epetra_CrsMatrix_Ptr overlapMatrix = Epetra_CrsMatrix_Ptr( new Epetra_CrsMatrix(Copy, *wavefunctionMap, numTotalBands, true) );

	//Calculate the number of elements per (proc local) row
	int globalStartRow = WavefunctionMaps(opRank)->MinMyGID();
	int globalEndRow = WavefunctionMaps(opRank)->MaxMyGID();
	int numRowsProc = globalEndRow - globalStartRow + 1;
	blitz::Array<int, 1> RowLengths;
	RowLengths.resize(numRowsProc);
	for (int i=globalStartRow; i<=globalEndRow; i++)
	{
		int startCol = std::max(i - numSuperDiagonals, 0);
		int endCol = std::min(i + numSuperDiagonals + 1, colSizeFull);
		RowLengths(i - globalStartRow) = endCol - startCol + 1;
	}

	OverlapMatrices(opRank) = Epetra_CrsMatrix_Ptr( new Epetra_CrsMatrix(Copy, *WavefunctionMaps(opRank), RowLengths.data(), true) );

	//Debug output from Epetra
	OverlapMatrices(opRank)->SetTracebackMode(4);

	/*int procId = 0;
	int procCount = 0;
	MPI_Comm_rank(MPI_COMM_WORLD, &procId);
	MPI_Comm_size(MPI_COMM_WORLD, &procCount);
	for (int i=0; i<procCount; i++)
	{
		if (procId == i)
		{
			cout << "Processor " << i << endl;
			cout << "Wavefunction Map " << *WavefunctionMaps(opRank) << endl;
			cout << endl;
		}
	}*/

	//Copy overlap slice corresponding to this proc into CrsMatrix. Since we have the entire overlap
	//matrix (for opRank) on every proc, global and local indices are the same. We only have to
	//determine start and end of row slice for this proc, and the first column index (banded case).
	for (int i=globalStartRow; i<=globalEndRow; i++)
	{
		int startCol = std::max(i - numSuperDiagonals, 0);
		int endCol = std::min(i + numSuperDiagonals + 1, colSizeFull);
		for (int j=startCol; j<endCol; j++)
		{
			int bandIdx = j - i + numSuperDiagonals;

			OverlapMatrices(opRank)->InsertGlobalValues(i, 1, &overlapFullCol(i, bandIdx), &j);
		}
	}

	//Signal end of matrix input
	OverlapMatrices(opRank)->FillComplete();
}