Exemple #1
0
double 
checkeig_ext (
    double *err,
    double *work,			/* work vector of length n */
    struct vtx_data **A,
    double *y,
    int n,
    double extval,
    double *vwsqrt,
    double *gvec,
    double eigtol,
    int warnings		/* don't want to see warning messages in one of the
				   contexts this is called */
)
{
    extern FILE *Output_File;	/* output file or null */
    extern int DEBUG_EVECS;	/* print debugging output? */
    extern int WARNING_EVECS;	/* print warning messages? */
    double    resid;		/* the extended eigen residual */
    double    ch_norm();		/* vector norm */
    void      splarax();	/* sparse matrix vector mult */
    void      scadd();		/* scaled vector add */
    void      scale_diag();	/* scale vector by another's elements */
    void      cpvec();		/* vector copy */

    splarax(err, A, n, y, vwsqrt, work);
    scadd(err, 1, n, -extval, y);
    cpvec(work, 1, n, gvec);	/* only need if going to re-use gvec */
    scale_diag(work, 1, n, vwsqrt);
    scadd(err, 1, n, -1.0, work);
    resid = ch_norm(err, 1, n);

    if (DEBUG_EVECS > 0) {
	printf("  extended residual: %g\n", resid);
	if (Output_File != NULL) {
	    fprintf(Output_File, "  extended residual: %g\n", resid);
	}
    }
    if (warnings && WARNING_EVECS > 0 && resid > eigtol) {
	printf("WARNING: Extended residual (%g) greater than tolerance (%g).\n",
	       resid, eigtol);
	if (Output_File != NULL) {
	    fprintf(Output_File,
		  "WARNING: Extended residual (%g) greater than tolerance (%g).\n",
		    resid, eigtol);
	}
    }

    return (resid);
}
Exemple #2
0
/* Check an eigenpair of A by direct multiplication.  */
double 
checkeig (double *err, struct vtx_data **A, double *y, int n, double lambda, double *vwsqrt, double *work)
{
    double    resid;
    double    normy;
    double    ch_norm();
    void      splarax(), scadd();

    splarax(err, A, n, y, vwsqrt, work);
    scadd(err, 1, n, -lambda, y);
    normy = ch_norm(y, 1, n);
    resid = ch_norm(err, 1, n) / normy;
    return (resid);
}
void 
orthogvec (
    double *vec1,			/* vector to be orthogonalized */
    int beg,
    int end,		/* start and stop range for vector */
    double *vec2			/* vector to be orthogonalized against */
)
{
    double    alpha;
    double    dot();
    void      scadd();

    alpha = -dot(vec1, beg, end, vec2) / dot(vec2, beg, end, vec2);
    scadd(vec1, beg, end, alpha, vec2);
}
Exemple #4
0
void 
sorthog (
    double *vec,			/* vector to be orthogonalized */
    int n,			/* length of the columns of orth */
    struct orthlink **solist,	/* set of vecs to orth. against */
    int ngood		/* number of vecs in solist */
)
{
    double    alpha;
    double   *dir;
    double    dot();
    void      scadd();
    int       i;

    for (i = 1; i <= ngood; i++) {
	dir = (solist[i])->vec;
	alpha = -dot(vec, 1, n, dir) / dot(dir, 1, n, dir);
	scadd(vec, 1, n, alpha, dir);
    }
}
Exemple #5
0
void 
coarsen (
/* Coarsen until nvtxs <= vmax, compute and uncoarsen. */
    struct vtx_data **graph,	/* array of vtx data for graph */
    int nvtxs,		/* number of vertices in graph */
    int nedges,		/* number of edges in graph */
    int using_vwgts,		/* are vertices weights being used? */
    int using_ewgts,		/* are edge weights being used? */
    float *term_wgts[],		/* terminal weights */
    int igeom,		/* dimension for geometric information */
    float **coords,		/* coordinates for vertices */
    double **yvecs,		/* eigenvectors returned */
    int ndims,		/* number of eigenvectors to calculate */
    int solver_flag,		/* which eigensolver to use */
    int vmax,			/* largest subgraph to stop coarsening */
    double eigtol,		/* tolerence in eigen calculation */
    int nstep,		/* number of coarsenings between RQI steps */
    int step,			/* current step number */
    int give_up		/* has coarsening bogged down? */
)
{
    extern FILE *Output_File;	/* output file or null */
    extern int DEBUG_COARSEN;	/* debug flag for coarsening */
    extern int PERTURB;		/* was matrix perturbed in Lanczos? */
    extern double COARSEN_RATIO_MIN;	/* min vtx reduction for coarsening */
    extern int COARSEN_VWGTS;	/* use vertex weights while coarsening? */
    extern int COARSEN_EWGTS;	/* use edge weights while coarsening? */
    extern double refine_time;	/* time for RQI/Symmlq iterative refinement */
    struct vtx_data **cgraph;	/* array of vtx data for coarsened graph */
    struct orthlink *orthlist;	/* list of lower evecs to suppress */
    struct orthlink *newlink;	/* lower evec to suppress */
    double   *cyvecs[MAXDIMS + 1];	/* eigenvectors for subgraph */
    double    evals[MAXDIMS + 1];	/* eigenvalues returned */
    double    goal[MAXSETS];	/* needed for convergence mode = 1 */
    double   *r1, *r2, *work;	/* space needed by symmlq/RQI */
    double   *v, *w, *x, *y;	/* space needed by symmlq/RQI */
    double   *gvec;		/* rhs vector in extended eigenproblem */
    double    evalest;		/* eigenvalue estimate returned by RQI */
    double    maxdeg;		/* maximum weighted degree of a vertex */
    float   **ccoords;		/* coordinates for coarsened graph */
    float    *cterm_wgts[MAXSETS];	/* coarse graph terminal weights */
    float    *new_term_wgts[MAXSETS];	/* terminal weights for Bui's method*/
    float   **real_term_wgts;	/* one of the above */
    float    *twptr;		/* loops through term_wgts */
    float    *twptr_save;	/* copy of twptr */
    float    *ctwptr;		/* loops through cterm_wgts */
    double   *vwsqrt = NULL;	/* square root of vertex weights */
    double    norm, alpha;	/* values used for orthogonalization */
    double    initshift;	/* initial shift for RQI */
    double    total_vwgt;	/* sum of all the vertex weights */
    double    w1, w2;		/* weights of two sets */
    double    sigma;		/* norm of rhs in extended eigenproblem */
    double    term_tot;		/* sum of all terminal weights */
    int    *space;		/* room for assignment in Lanczos */
    int      *morespace;	/* room for assignment in Lanczos */
    int      *v2cv;		/* mapping from vertices to coarse vtxs */
    int       vwgt_max;		/* largest vertex weight */
    int       oldperturb;	/* saves PERTURB value */
    int       cnvtxs;		/* number of vertices in coarsened graph */
    int       cnedges;		/* number of edges in coarsened graph */
    int       nextstep;		/* next step in RQI test */
    int       nsets;		/* number of sets being created */
    int       i, j;		/* loop counters */
    double    time;		/* time marker */

    double   dot(), ch_normalize(), find_maxdeg(), seconds();
    struct orthlink *makeorthlnk();
    void      makevwsqrt(), eigensolve(), coarsen1(), orthogvec(), rqi_ext();
    void      ch_interpolate(), orthog1(), rqi(), scadd(), free_graph();

    if (DEBUG_COARSEN > 0) {
	printf("<Entering coarsen, step=%d, nvtxs=%d, nedges=%d, vmax=%d>\n",
	       step, nvtxs, nedges, vmax);
    }

    nsets = 1 << ndims;

    /* Is problem small enough to solve? */
    if (nvtxs <= vmax || give_up) {
	if (using_vwgts) {
	    vwsqrt = smalloc((nvtxs + 1) * sizeof(double));
	    makevwsqrt(vwsqrt, graph, nvtxs);
	}
	else
	    vwsqrt = NULL;
	maxdeg = find_maxdeg(graph, nvtxs, using_ewgts, (float *) NULL);

	if (using_vwgts) {
	    vwgt_max = 0;
	    total_vwgt = 0;
	    for (i = 1; i <= nvtxs; i++) {
		if (graph[i]->vwgt > vwgt_max)
		    vwgt_max = graph[i]->vwgt;
		total_vwgt += graph[i]->vwgt;
	    }
	}
	else {
	    vwgt_max = 1;
	    total_vwgt = nvtxs;
	}
	for (i = 0; i < nsets; i++)
	    goal[i] = total_vwgt / nsets;

	space = smalloc((nvtxs + 1) * sizeof(int));

	/* If not coarsening ewgts, then need care with term_wgts. */
	if (!using_ewgts && term_wgts[1] != NULL && step != 0) {
	    twptr = smalloc((nvtxs + 1) * (nsets - 1) * sizeof(float));
	    twptr_save = twptr;
	    for (j = 1; j < nsets; j++) {
	        new_term_wgts[j] = twptr;
	        twptr += nvtxs + 1;
	    }

	    for (j = 1; j < nsets; j++) {
	        twptr = term_wgts[j];
	        ctwptr = new_term_wgts[j];
	        for (i = 1; i <= nvtxs; i++) {
		    if (twptr[i] > .5) ctwptr[i] = 1;
		    else if (twptr[i] < -.5) ctwptr[i] = -1;
		    else ctwptr[i] = 0;
		}
	    }
	    real_term_wgts = new_term_wgts;
	}
	else {
	    real_term_wgts = term_wgts;
	    new_term_wgts[1] = NULL;
	}

	eigensolve(graph, nvtxs, nedges, maxdeg, vwgt_max, vwsqrt,
		   using_vwgts, using_ewgts, real_term_wgts, igeom, coords,
		   yvecs, evals, 0, space, goal,
		   solver_flag, FALSE, 0, ndims, 3, eigtol);

	if (real_term_wgts != term_wgts && new_term_wgts[1] != NULL) {
	    sfree(real_term_wgts[1]);
	}
	sfree(space);
	if (vwsqrt != NULL)
	    sfree(vwsqrt);
	return;
    }

    /* Otherwise I have to coarsen. */

    if (coords != NULL) {
	ccoords = smalloc(igeom * sizeof(float *));
    }
    else {
	ccoords = NULL;
    }
    coarsen1(graph, nvtxs, nedges, &cgraph, &cnvtxs, &cnedges,
	     &v2cv, igeom, coords, ccoords, using_ewgts);

    /* If coarsening isn't working very well, give up and partition. */
    give_up = FALSE;
    if (nvtxs * COARSEN_RATIO_MIN < cnvtxs && cnvtxs > vmax ) {
	printf("WARNING: Coarsening not making enough progress, nvtxs = %d, cnvtxs = %d.\n",
	    nvtxs, cnvtxs);
	printf("         Recursive coarsening being stopped prematurely.\n");
	if (Output_File != NULL) {
	    fprintf(Output_File,
		"WARNING: Coarsening not making enough progress, nvtxs = %d, cnvtxs = %d.\n",
	        nvtxs, cnvtxs);
	    fprintf(Output_File,
		"         Recursive coarsening being stopped prematurely.\n");
	}
	give_up = TRUE;
    }

    /* Create space for subgraph yvecs. */
    for (i = 1; i <= ndims; i++) {
	cyvecs[i] = smalloc((cnvtxs + 1) * sizeof(double));
    }

    /* Make coarse version of terminal weights. */
    if (term_wgts[1] != NULL) {
	twptr = smalloc((cnvtxs + 1) * (nsets - 1) * sizeof(float));
	twptr_save = twptr;
	for (i = (cnvtxs + 1) * (nsets - 1); i ; i--) {
	    *twptr++ = 0;
	}
	twptr = twptr_save;
	for (j = 1; j < nsets; j++) {
	    cterm_wgts[j] = twptr;
	    twptr += cnvtxs + 1;
	}
	for (j = 1; j < nsets; j++) {
	    ctwptr = cterm_wgts[j];
	    twptr = term_wgts[j];
	    for (i = 1; i < nvtxs; i++){
	        ctwptr[v2cv[i]] += twptr[i];
	    }
	}
    }
    else {
	cterm_wgts[1] = NULL;
    }

    /* Now recurse on coarse subgraph. */
    nextstep = step + 1;
    coarsen(cgraph, cnvtxs, cnedges, COARSEN_VWGTS, COARSEN_EWGTS, cterm_wgts,
	    igeom, ccoords, cyvecs, ndims, solver_flag, vmax, eigtol,
	    nstep, nextstep, give_up);

    ch_interpolate(yvecs, cyvecs, ndims, graph, nvtxs, v2cv, using_ewgts);

    sfree(cterm_wgts[1]);
    sfree(v2cv);

    /* I need to do Rayleigh Quotient Iteration each nstep stages. */
    time = seconds();
    if (!(step % nstep)) {
	oldperturb = PERTURB;
	PERTURB = FALSE;
	/* Should I do some orthogonalization here against vwsqrt? */
	if (using_vwgts) {
	    vwsqrt = smalloc((nvtxs + 1) * sizeof(double));
	    makevwsqrt(vwsqrt, graph, nvtxs);

	    for (i = 1; i <= ndims; i++)
		orthogvec(yvecs[i], 1, nvtxs, vwsqrt);
	}
	else
	    for (i = 1; i <= ndims; i++)
		orthog1(yvecs[i], 1, nvtxs);

	/* Allocate space that will be needed in RQI. */
	r1 = smalloc(7 * (nvtxs + 1) * sizeof(double));
	r2 = &r1[nvtxs + 1];
	v = &r1[2 * (nvtxs + 1)];
	w = &r1[3 * (nvtxs + 1)];
	x = &r1[4 * (nvtxs + 1)];
	y = &r1[5 * (nvtxs + 1)];
	work = &r1[6 * (nvtxs + 1)];

	if (using_vwgts) {
	    vwgt_max = 0;
	    total_vwgt = 0;
	    for (i = 1; i <= nvtxs; i++) {
		if (graph[i]->vwgt > vwgt_max)
		    vwgt_max = graph[i]->vwgt;
		total_vwgt += graph[i]->vwgt;
	    }
	}
	else {
	    vwgt_max = 1;
	    total_vwgt = nvtxs;
	}
	for (i = 0; i < nsets; i++)
	    goal[i] = total_vwgt / nsets;

	space = smalloc((nvtxs + 1) * sizeof(int));
	morespace = smalloc((nvtxs) * sizeof(int));

	initshift = 0;
	orthlist = NULL;
	for (i = 1; i < ndims; i++) {
	    ch_normalize(yvecs[i], 1, nvtxs);
	    rqi(graph, yvecs, i, nvtxs, r1, r2, v, w, x, y, work,
		eigtol, initshift, &evalest, vwsqrt, orthlist,
		0, nsets, space, morespace, 3, goal, vwgt_max, ndims);

	    /* Now orthogonalize higher yvecs against this one. */
	    norm = dot(yvecs[i], 1, nvtxs, yvecs[i]);
	    for (j = i + 1; j <= ndims; j++) {
		alpha = -dot(yvecs[j], 1, nvtxs, yvecs[i]) / norm;
		scadd(yvecs[j], 1, nvtxs, alpha, yvecs[i]);
	    }

	    /* Now prepare for next pass through loop. */
	    initshift = evalest;
	    newlink = makeorthlnk();
	    newlink->vec = yvecs[i];
	    newlink->pntr = orthlist;
	    orthlist = newlink;

	}
	ch_normalize(yvecs[ndims], 1, nvtxs);

	if (term_wgts[1] != NULL && ndims == 1) {
	    /* Solve extended eigen problem */

	    /* If not coarsening ewgts, then need care with term_wgts. */
	    if (!using_ewgts && term_wgts[1] != NULL && step != 0) {
	        twptr = smalloc((nvtxs + 1) * (nsets - 1) * sizeof(float));
	        twptr_save = twptr;
	        for (j = 1; j < nsets; j++) {
	            new_term_wgts[j] = twptr;
	            twptr += nvtxs + 1;
	        }

	        for (j = 1; j < nsets; j++) {
	            twptr = term_wgts[j];
	            ctwptr = new_term_wgts[j];
	            for (i = 1; i <= nvtxs; i++) {
		        if (twptr[i] > .5) ctwptr[i] = 1;
		        else if (twptr[i] < -.5) ctwptr[i] = -1;
		        else ctwptr[i] = 0;
		    }
	        }
	        real_term_wgts = new_term_wgts;
	    }
	    else {
	        real_term_wgts = term_wgts;
	        new_term_wgts[1] = NULL;
	    }

	    /* Following only works for bisection. */
	    w1 = goal[0];
	    w2 = goal[1];
	    sigma = sqrt(4*w1*w2/(w1+w2));
	    gvec = smalloc((nvtxs+1)*sizeof(double));
	    term_tot = sigma;	/* Avoids lint warning for now. */
	    term_tot = 0;
	    for (j=1; j<=nvtxs; j++) term_tot += (real_term_wgts[1])[j];
	    term_tot /= (w1+w2);
	    if (using_vwgts) {
	        for (j=1; j<=nvtxs; j++) {
		    gvec[j] = (real_term_wgts[1])[j]/graph[j]->vwgt - term_tot;
		}
	    }
	    else {
	        for (j=1; j<=nvtxs; j++) {
		    gvec[j] = (real_term_wgts[1])[j] - term_tot;
		}
	    }

	    rqi_ext();

	    sfree(gvec);
	    if (real_term_wgts != term_wgts && new_term_wgts[1] != NULL) {
		sfree(new_term_wgts[1]);
	    }
	}
	else {
	    rqi(graph, yvecs, ndims, nvtxs, r1, r2, v, w, x, y, work,
	        eigtol, initshift, &evalest, vwsqrt, orthlist,
	        0, nsets, space, morespace, 3, goal, vwgt_max, ndims);
	}
	refine_time += seconds() - time;

	/* Free the space allocated for RQI. */
	sfree(morespace);
	sfree(space);
	while (orthlist != NULL) {
	    newlink = orthlist->pntr;
	    sfree(orthlist);
	    orthlist = newlink;
	}
	sfree(r1);
	if (vwsqrt != NULL)
	    sfree(vwsqrt);
	PERTURB = oldperturb;
    }
    if (DEBUG_COARSEN > 0) {
	printf(" Leaving coarsen, step=%d\n", step);
    }

    /* Free the space that was allocated. */
    if (ccoords != NULL) {
	for (i = 0; i < igeom; i++)
	    sfree(ccoords[i]);
	sfree(ccoords);
    }
    for (i = ndims; i > 0; i--)
	sfree(cyvecs[i]);
    free_graph(cgraph);
}
Exemple #6
0
bool
power_iteration(double **square_mat, int n, int neigs, double **eigs,
		double *evals, bool initialize)
{
    /* compute the 'neigs' top eigenvectors of 'square_mat' using power iteration */

    int i, j;
    double *tmp_vec = N_GNEW(n, double);
    double *last_vec = N_GNEW(n, double);
    double *curr_vector;
    double len;
    double angle;
    double alpha;
    int iteration = 0;
    int largest_index;
    double largest_eval;
    int Max_iterations = 30 * n;

    double tol = 1 - p_iteration_threshold;

    if (neigs >= n) {
	neigs = n;
    }

    for (i = 0; i < neigs; i++) {
	curr_vector = eigs[i];
	/* guess the i-th eigen vector */
      choose:
	if (initialize)
	    for (j = 0; j < n; j++)
		curr_vector[j] = rand() % 100;
	/* orthogonalize against higher eigenvectors */
	for (j = 0; j < i; j++) {
	    alpha = -dot(eigs[j], 0, n - 1, curr_vector);
	    scadd(curr_vector, 0, n - 1, alpha, eigs[j]);
	}
	len = norm(curr_vector, 0, n - 1);
	if (len < 1e-10) {
	    /* We have chosen a vector colinear with prvious ones */
	    goto choose;
	}
	vecscale(curr_vector, 0, n - 1, 1.0 / len, curr_vector);
	iteration = 0;
	do {
	    iteration++;
	    cpvec(last_vec, 0, n - 1, curr_vector);

	    right_mult_with_vector_d(square_mat, n, n, curr_vector,
				     tmp_vec);
	    cpvec(curr_vector, 0, n - 1, tmp_vec);

	    /* orthogonalize against higher eigenvectors */
	    for (j = 0; j < i; j++) {
		alpha = -dot(eigs[j], 0, n - 1, curr_vector);
		scadd(curr_vector, 0, n - 1, alpha, eigs[j]);
	    }
	    len = norm(curr_vector, 0, n - 1);
	    if (len < 1e-10 || iteration > Max_iterations) {
		/* We have reached the null space (e.vec. associated with e.val. 0) */
		goto exit;
	    }

	    vecscale(curr_vector, 0, n - 1, 1.0 / len, curr_vector);
	    angle = dot(curr_vector, 0, n - 1, last_vec);
	} while (fabs(angle) < tol);
	evals[i] = angle * len;	/* this is the Rayleigh quotient (up to errors due to orthogonalization):
				   u*(A*u)/||A*u||)*||A*u||, where u=last_vec, and ||u||=1
				 */
    }
  exit:
    for (; i < neigs; i++) {
	/* compute the smallest eigenvector, which are  */
	/* probably associated with eigenvalue 0 and for */
	/* which power-iteration is dangerous */
	curr_vector = eigs[i];
	/* guess the i-th eigen vector */
	for (j = 0; j < n; j++)
	    curr_vector[j] = rand() % 100;
	/* orthogonalize against higher eigenvectors */
	for (j = 0; j < i; j++) {
	    alpha = -dot(eigs[j], 0, n - 1, curr_vector);
	    scadd(curr_vector, 0, n - 1, alpha, eigs[j]);
	}
	len = norm(curr_vector, 0, n - 1);
	vecscale(curr_vector, 0, n - 1, 1.0 / len, curr_vector);
	evals[i] = 0;

    }


    /* sort vectors by their evals, for overcoming possible mis-convergence: */
    for (i = 0; i < neigs - 1; i++) {
	largest_index = i;
	largest_eval = evals[largest_index];
	for (j = i + 1; j < neigs; j++) {
	    if (largest_eval < evals[j]) {
		largest_index = j;
		largest_eval = evals[largest_index];
	    }
	}
	if (largest_index != i) {	/* exchange eigenvectors: */
	    cpvec(tmp_vec, 0, n - 1, eigs[i]);
	    cpvec(eigs[i], 0, n - 1, eigs[largest_index]);
	    cpvec(eigs[largest_index], 0, n - 1, tmp_vec);

	    evals[largest_index] = evals[i];
	    evals[i] = largest_eval;
	}
    }

    free(tmp_vec);
    free(last_vec);

    return (iteration <= Max_iterations);
}
Exemple #7
0
/* Benchmark certain kernel operations */
void 
time_kernels (
    struct vtx_data **A,		/* matrix/graph being analyzed */
    int n,			/* number of rows/columns in matrix */
    double *vwsqrt		/* square roots of vertex weights */
)
{
    extern int DEBUG_PERTURB;	/* debug flag for matrix perturbation */
    extern int PERTURB;		/* randomly perturb to break symmetry? */
    extern int NPERTURB;	/* number of edges to perturb */
    extern int DEBUG_TRACE;	/* trace main execution path */
    extern double PERTURB_MAX;	/* maximum size of perturbation */
    int       i, beg, end;
    double   *dvec1, *dvec2, *dvec3;
    float    *svec1, *svec2, *svec3, *vwsqrt_float;
    double    norm_dvec, norm_svec;
    double    dot_dvec, dot_svec;
    double    time, time_dvec, time_svec;
    double    diff;
    double    factor, fac;
    float     factor_float, fac_float;
    int       loops;
    double    min_time, target_time;

    double   *mkvec();
    float    *mkvec_float();
    void      frvec(), frvec_float();
    void      vecran();
    double    ch_norm(), dot();
    double    norm_float(), dot_float();
    double    seconds();
    void      scadd(), scadd_float(), update(), update_float();
    void      splarax(), splarax_float();
    void      perturb_init(), perturb_clear();

    if (DEBUG_TRACE > 0) {
	printf("<Entering time_kernels>\n");
    }

    beg = 1;
    end = n;

    dvec1 = mkvec(beg, end);
    dvec2 = mkvec(beg, end);
    dvec3 = mkvec(beg - 1, end);
    svec1 = mkvec_float(beg, end);
    svec2 = mkvec_float(beg, end);
    svec3 = mkvec_float(beg - 1, end);

    if (vwsqrt == NULL) {
	vwsqrt_float = NULL;
    }
    else {
        vwsqrt_float = mkvec_float(beg - 1, end);
	for (i = beg - 1; i <= end; i++) {
	    vwsqrt_float[i] = vwsqrt[i];
	}
    }

    vecran(dvec1, beg, end);
    vecran(dvec2, beg, end);
    vecran(dvec3, beg, end);
    for (i = beg; i <= end; i++) {
	svec1[i] = dvec1[i];
	svec2[i] = dvec2[i];
	svec3[i] = dvec3[i];
    }

    /* Set number of loops so that ch_norm() takes about one second. This should
       insulate against inaccurate timings on faster machines. */

    loops = 1;
    time_dvec = 0;
    min_time = 0.5;
    target_time = 1.0;
    while (time_dvec < min_time) {
	time = seconds();
	for (i = loops; i; i--) {
	    norm_dvec = ch_norm(dvec1, beg, end);
	}
	time_dvec = seconds() - time;
	if (time_dvec < min_time) {
	    loops = 10 * loops;
	}
    }
    loops = (target_time / time_dvec) * loops;
    if (loops < 1)
	loops = 1;

    printf("                Kernel benchmarking\n");
    printf("Time (in seconds) for %d loops of each operation:\n\n", loops);

    printf("Routine      Double     Float      Discrepancy      Description\n");
    printf("-------      ------     -----      -----------      -----------\n");


    /* Norm operation */
    time = seconds();
    for (i = loops; i; i--) {
	norm_dvec = ch_norm(dvec1, beg, end);
    }
    time_dvec = seconds() - time;

    time = seconds();
    for (i = loops; i; i--) {
	norm_svec = norm_float(svec1, beg, end);
    }
    time_svec = seconds() - time;

    diff = norm_dvec - norm_svec;
    printf("norm        %6.2f    %6.2f    %14.5e", time_dvec, time_svec, diff);
    printf("      2 norm\n");


    /* Dot operation */
    time = seconds();
    for (i = loops; i; i--) {
	dot_dvec = dot(dvec1, beg, end, dvec2);
    }
    time_dvec = seconds() - time;

    time = seconds();
    for (i = loops; i; i--) {
	dot_svec = dot_float(svec1, beg, end, svec2);
    }
    time_svec = seconds() - time;

    diff = dot_dvec - dot_svec;
    printf("dot         %6.2f    %6.2f    %14.5e", time_dvec, time_svec, diff);
    printf("      scalar product\n");


    /* Scadd operation */
    factor = 1.01;
    factor_float = factor;

    fac = factor;
    time = seconds();
    for (i = loops; i; i--) {
	scadd(dvec1, beg, end, fac, dvec2);
	fac = -fac;		/* to keep things in scale */
    }
    time_dvec = seconds() - time;

    fac_float = factor_float;
    time = seconds();
    for (i = loops; i; i--) {
	scadd_float(svec1, beg, end, fac_float, svec2);
	fac_float = -fac_float;	/* to keep things in scale */
    }
    time_svec = seconds() - time;

    diff = checkvec(dvec1, beg, end, svec1);
    printf("scadd       %6.2f    %6.2f    %14.5e", time_dvec, time_svec, diff);
    printf("      vec1 <- vec1 + alpha*vec2\n");


    /* Update operation */
    time = seconds();
    for (i = loops; i; i--) {
	update(dvec1, beg, end, dvec2, factor, dvec3);
    }
    time_dvec = seconds() - time;

    time = seconds();
    for (i = loops; i; i--) {
	update_float(svec1, beg, end, svec2, factor_float, svec3);
    }
    time_svec = seconds() - time;

    diff = checkvec(dvec1, beg, end, svec1);
    printf("update      %6.2f    %6.2f    %14.2g", time_dvec, time_svec, diff);
    printf("      vec1 <- vec2 + alpha*vec3\n");

    /* splarax operation */
    if (PERTURB) {
	if (NPERTURB > 0 && PERTURB_MAX > 0.0) {
	    perturb_init(n);
	    if (DEBUG_PERTURB > 0) {
		printf("Matrix being perturbed with scale %e\n", PERTURB_MAX);
	    }
	}
	else if (DEBUG_PERTURB > 0) {
	    printf("Matrix not being perturbed\n");
	}
    }

    time = seconds();
    for (i = loops; i; i--) {
	splarax(dvec1, A, n, dvec2, vwsqrt, dvec3);
    }
    time_dvec = seconds() - time;

    time = seconds();
    for (i = loops; i; i--) {
	splarax_float(svec1, A, n, svec2, vwsqrt_float, svec3);
    }

    time_svec = seconds() - time;

    diff = checkvec(dvec1, beg, end, svec1);
    printf("splarax     %6.2f    %6.2f    %14.5e", time_dvec, time_svec, diff);
    printf("      sparse matrix vector multiply\n");

    if (PERTURB && NPERTURB > 0 && PERTURB_MAX > 0.0) {
	perturb_clear();
    }
    printf("\n");

    /* Free memory */
    frvec(dvec1, 1);
    frvec(dvec2, 1);
    frvec(dvec3, 0);
    frvec_float(svec1, 1);
    frvec_float(svec2, 1);
    frvec_float(svec3, 0);
    if (vwsqrt_float != NULL) {
        frvec_float(vwsqrt_float, beg - 1);
    }
}
Exemple #8
0
void 
rqi (
    struct vtx_data **A,		/* matrix/graph being analyzed */
    double **yvecs,		/* eigenvectors to be refined */
    int index,		/* index of vector in yvecs to be refined */
    int n,			/* number of rows/columns in matrix */
    double *r1,
    double *r2,
    double *v,
    double *w,
    double *x,
    double *y,
    double *work,	/* work space for symmlq */
    double tol,			/* error tolerance in eigenpair */
    double initshift,		/* initial shift */
    double *evalest,		/* returned eigenvalue */
    double *vwsqrt,		/* square roots of vertex weights */
    struct orthlink *orthlist,	/* lower evecs to orthogonalize against */
    int cube_or_mesh,		/* 0 => hypercube, d => d-dimensional mesh */
    int nsets,		/* number of sets to divide into */
    int *assignment,		/* set number of each vtx (length n+1) */
    int *active,		/* space for nvtxs integers */
    int mediantype,		/* which partitioning strategy to use */
    double *goal,			/* desired set sizes */
    int vwgt_max,		/* largest vertex weight */
    int ndims		/* dimensionality of partition */
)
{
    extern int DEBUG_EVECS;	/* debug flag for eigen computation */
    extern int DEBUG_TRACE;	/* trace main execution path */
    extern int WARNING_EVECS;	/* warning flag for eigen computation */
    extern int RQI_CONVERGENCE_MODE;	/* type of convergence monitoring to do */
    int       rqisteps;		/* # rqi rqisteps */
    double    res;		/* convergence quant for rqi */
    double    last_res;		/* res on previous rqi step */
    double    macheps;		/* machine precision calculated by symmlq */
    double    normxlim;		/* a stopping criteria for symmlq */
    double    normx;		/* norm of the solution vector */
    int       symmlqitns;	/* # symmlq itns */
    int       inv_it_steps;	/* intial steps of inverse iteration */
    long      itnmin;		/* symmlq input */
    double    shift, rtol;	/* symmlq input */
    long      precon, goodb, nout;	/* symmlq input */
    long      checka, intlim;	/* symmlq input */
    double    anorm, acond;	/* symmlq output */
    double    rnorm, ynorm;	/* symmlq output */
    long      istop, itn;	/* symmlq output */
    long      long_n;		/* copy of n for passing to symmlq */
    int       warning;		/* warning on possible misconvergence */
    double    factor;		/* ratio between previous res and new tol */
    double    minfactor;	/* minimum acceptable value of factor */
    int       converged;	/* has process converged yet? */
    double   *u;		/* name of vector being refined */
    int    *old_assignment=NULL;/* previous assignment vector */
    int    *assgn_pntr;	/* pntr to assignment vector */
    int    *old_assgn_pntr;	/* pntr to previous assignment vector */
    int       assigndiff=0;	/* discrepancies between old and new assignment */
    int       assigntol=0;	/* tolerance on convergence of assignment vector */
    int       first;		/* is this the first RQI step? */
    int       i;		/* loop index */

    double    dot(), ch_norm();
    int       symmlq_();
    void      splarax(), scadd(), vecscale(), doubleout(), assign(), x2y(), strout();


    if (DEBUG_TRACE > 0) {
	printf("<Entering rqi>\n");
    }

    /* Initialize RQI loop */
    u = yvecs[index];
    splarax(y, A, n, u, vwsqrt, r1);
    shift = dot(u, 1, n, y);
    scadd(y, 1, n, -shift, u);
    res = ch_norm(y, 1, n);	/* eigen-residual */
    rqisteps = 0;		/* a counter */
    symmlqitns = 0;		/* a counter */

    /* Set invariant symmlq parameters */
    precon = FALSE;		/* FALSE until we figure out a good way */
    goodb = TRUE;		/* should be TRUE for this application */
    nout = 0;			/* set to 0 for no Symmlq output; 6 for lots */
    checka = FALSE;		/* if don't know by now, too bad */
    intlim = n;			/* set to enforce a maximum number of Symmlq itns */
    itnmin = 0;			/* set to enforce a minimum number of Symmlq itns */
    long_n = n;			/* type change for alint */

    if (DEBUG_EVECS > 0) {
	printf("Using RQI/Symmlq refinement on graph with %d vertices.\n", n);
    }
    if (DEBUG_EVECS > 1) {
	printf("  step      lambda est.            Ares          Symmlq its.   istop  factor  delta\n");
	printf("    0");
	doubleout(shift, 1);
	doubleout(res, 1);
	printf("\n");
    }

    if (RQI_CONVERGENCE_MODE == 1) {
	assigntol = tol * n;
	old_assignment = smalloc((n + 1) * sizeof(int));
    }

    /* Perform RQI */
    inv_it_steps = 2;
    warning = FALSE;
    factor = 10;
    minfactor = factor / 2;
    first = TRUE;
    if (res < tol)
	converged = TRUE;
    else
	converged = FALSE;
    while (!converged) {
	if (res / tol < 1.2) {
	    factor = max(factor / 2, minfactor);
	}
	rtol = res / factor;

	/* exit Symmlq if iterate is this large */
	normxlim = 1.0 / rtol;

	if (rqisteps < inv_it_steps) {
	    shift = initshift;
	}

	symmlq_(&long_n, &u[1], &r1[1], &r2[1], &v[1], &w[1], &x[1], &y[1],
		work, &checka, &goodb, &precon, &shift, &nout,
		&intlim, &rtol, &istop, &itn, &anorm, &acond,
		&rnorm, &ynorm, (double *) A, vwsqrt, (double *) orthlist,
		&macheps, &normxlim, &itnmin);
	symmlqitns += itn;
	normx = ch_norm(x, 1, n);
	vecscale(u, 1, n, 1.0 / normx, x);
	splarax(y, A, n, u, vwsqrt, r1);
	shift = dot(u, 1, n, y);
	scadd(y, 1, n, -shift, u);
	last_res = res;
	res = ch_norm(y, 1, n);
	if (res > last_res) {
	    warning = TRUE;
	}
	rqisteps++;

	if (res < tol)
	    converged = TRUE;

	if (RQI_CONVERGENCE_MODE == 1 && !converged && ndims == 1) {
	    if (first) {
		assign(A, yvecs, n, 1, cube_or_mesh, nsets, vwsqrt, assignment,
		       active, mediantype, goal, vwgt_max);
		x2y(yvecs, ndims, n, vwsqrt);
		first = FALSE;
		assigndiff = n;	/* dummy value for debug chart */
	    }
	    else {
		/* copy assignment to old_assignment */
		assgn_pntr = assignment;
		old_assgn_pntr = old_assignment;
		for (i = n + 1; i; i--) {
		    *old_assgn_pntr++ = *assgn_pntr++;
		}

		assign(A, yvecs, n, ndims, cube_or_mesh, nsets, vwsqrt, assignment,
		       active, mediantype, goal, vwgt_max);
		x2y(yvecs, ndims, n, vwsqrt);

		/* count differences in assignment */
		assigndiff = 0;
		assgn_pntr = assignment;
		old_assgn_pntr = old_assignment;
		for (i = n + 1; i; i--) {
		    if (*old_assgn_pntr++ != *assgn_pntr++)
			assigndiff++;
		}
		assigndiff = min(assigndiff, n - assigndiff);
		if (assigndiff <= assigntol)
		    converged = TRUE;
	    }
	}

	if (DEBUG_EVECS > 1) {
	    printf("   %2d", rqisteps);
	    doubleout(shift, 1);
	    doubleout(res, 1);
	    printf("     %3ld", itn);
	    printf("          %ld", istop);
	    printf("      %g", factor);
	    if (RQI_CONVERGENCE_MODE == 1)
		printf("     %d\n", assigndiff);
	    else
		printf("\n");
	}
    }
    *evalest = shift;

    if (WARNING_EVECS > 0 && warning) {
	strout("WARNING: Residual convergence not monotonic; RQI may have misconverged.\n");
    }

    if (DEBUG_EVECS > 0) {
	printf("Eval ");
	doubleout(*evalest, 1);
	printf("   RQI steps %d,  Symmlq iterations %d.\n\n", rqisteps, symmlqitns);
    }

    if (RQI_CONVERGENCE_MODE == 1) {
	sfree(old_assignment);
    }
}
Exemple #9
0
void
rescale_layout_polar(double *x_coords, double *y_coords,
		     double *x_foci, double *y_foci, int num_foci,
		     int n, int interval, double width,
		     double height, double margin, double distortion)
{
    // Polar distortion - main function
    int i;
    double minX, maxX, minY, maxY;
    double aspect_ratio;
    v_data *graph;
    double scaleX;
    double scale_ratio;

    width -= 2 * margin;
    height -= 2 * margin;

    // compute original aspect ratio
    minX = maxX = x_coords[0];
    minY = maxY = y_coords[0];
    for (i = 1; i < n; i++) 
	{
		if (x_coords[i] < minX)
		    minX = x_coords[i];
		if (y_coords[i] < minY)
			minY = y_coords[i];
		if (x_coords[i] > maxX)
			maxX = x_coords[i];
		if (y_coords[i] > maxY)
			maxY = y_coords[i];
    }
    aspect_ratio = (maxX - minX) / (maxY - minY);

    // construct mutual neighborhood graph
    graph = UG_graph(x_coords, y_coords, n, 0);

    if (num_foci == 1) 
	{	// accelerate execution of most common case
		rescale_layout_polarFocus(graph, n, x_coords, y_coords, x_foci[0],
				  y_foci[0], interval, distortion);
    } else
	{
	// average-based rescale
	double *final_x_coords = N_NEW(n, double);
	double *final_y_coords = N_NEW(n, double);
	double *cp_x_coords = N_NEW(n, double);
	double *cp_y_coords = N_NEW(n, double);
	for (i = 0; i < n; i++) {
	    final_x_coords[i] = final_y_coords[i] = 0;
	}
	for (i = 0; i < num_foci; i++) {
	    cpvec(cp_x_coords, 0, n - 1, x_coords);
	    cpvec(cp_y_coords, 0, n - 1, y_coords);
	    rescale_layout_polarFocus(graph, n, cp_x_coords, cp_y_coords,
				      x_foci[i], y_foci[i], interval, distortion);
	    scadd(final_x_coords, 0, n - 1, 1.0 / num_foci, cp_x_coords);
	    scadd(final_y_coords, 0, n - 1, 1.0 / num_foci, cp_y_coords);
	}
	cpvec(x_coords, 0, n - 1, final_x_coords);
	cpvec(y_coords, 0, n - 1, final_y_coords);
	free(final_x_coords);
	free(final_y_coords);
	free(cp_x_coords);
	free(cp_y_coords);
    }
    free(graph[0].edges);
    free(graph);

    minX = maxX = x_coords[0];
    minY = maxY = y_coords[0];
    for (i = 1; i < n; i++) {
	if (x_coords[i] < minX)
	    minX = x_coords[i];
	if (y_coords[i] < minY)
	    minY = y_coords[i];
	if (x_coords[i] > maxX)
	    maxX = x_coords[i];
	if (y_coords[i] > maxY)
	    maxY = y_coords[i];
    }

    // shift points:
    for (i = 0; i < n; i++) {
	x_coords[i] -= minX;
	y_coords[i] -= minY;
    }

    // rescale x_coords to maintain aspect ratio:
    scaleX = aspect_ratio * (maxY - minY) / (maxX - minX);
    for (i = 0; i < n; i++) {
	x_coords[i] *= scaleX;
    }


    // scale the layout to fit full drawing area:
    scale_ratio =
	MIN((width) / (aspect_ratio * (maxY - minY)),
	    (height) / (maxY - minY));
    for (i = 0; i < n; i++) {
	x_coords[i] *= scale_ratio;
	y_coords[i] *= scale_ratio;
    }

    for (i = 0; i < n; i++) {
	x_coords[i] += margin;
	y_coords[i] += margin;
    }
}
int 
lanczos_ext_float (
    struct vtx_data **A,		/* sparse matrix in row linked list format */
    int n,			/* problem size */
    int d,			/* problem dimension = number of eigvecs to find */
    double **y,			/* columns of y are eigenvectors of A  */
    double eigtol,		/* tolerance on eigenvectors */
    double *vwsqrt,		/* square roots of vertex weights */
    double maxdeg,		/* maximum degree of graph */
    int version,		/* flags which version of sel. orth. to use */
    double *gvec,		/* the rhs n-vector in the extended eigen problem */
    double sigma		/* specifies the norm constraint on extended
				   eigenvector */
)
{
    extern FILE *Output_File;		/* output file or null */
    extern int LANCZOS_SO_INTERVAL;	/* interval between orthogonalizations */
    extern int LANCZOS_MAXITNS;         /* maximum Lanczos iterations allowed */
    extern int DEBUG_EVECS;		/* print debugging output? */
    extern int DEBUG_TRACE;		/* trace main execution path */
    extern int WARNING_EVECS;		/* print warning messages? */
    extern double BISECTION_SAFETY;	/* safety factor for T bisection */
    extern double SRESTOL;		/* resid tol for T evec comp */
    extern double DOUBLE_EPSILON;	/* machine precision */
    extern double DOUBLE_MAX;		/* largest double value */
    extern double splarax_time; /* time matvec */
    extern double orthog_time;  /* time orthogonalization work */
    extern double evec_time;    /* time to generate eigenvectors */
    extern double ql_time;      /* time tridiagonal eigenvalue work */
    extern double blas_time;    /* time for blas. linear algebra */
    extern double init_time;    /* time to allocate, intialize variables */
    extern double scan_time;    /* time for scanning eval and bound lists */
    extern double debug_time;   /* time for (some of) debug computations */
    extern double ritz_time;    /* time to generate ritz vectors */
    extern double pause_time;   /* time to compute whether to pause */
    int       i, j, k;		/* indicies */
    int       maxj;		/* maximum number of Lanczos iterations */
    float    *u, *r;		/* Lanczos vectors */
    double   *u_double;		/* double version of u */
    double   *alpha, *beta;	/* the Lanczos scalars from each step */
    double   *ritz;		/* copy of alpha for ql */
    double   *workj;		/* work vector, e.g. copy of beta for ql */
    float    *workn;		/* work vector, e.g. product Av for checkeig */
    double   *workn_double;	/* work vector, e.g. product Av for checkeig */
    double   *s;		/* eigenvector of T */
    float   **q;		/* columns of q are Lanczos basis vectors */
    double   *bj;		/* beta(j)*(last el. of corr. eigvec s of T) */
    double    bis_safety;	/* real safety factor for T bisection */
    double    Sres;		/* how well Tevec calculated eigvec s */
    double    Sres_max;		/* Max value of Sres */
    int       inc_bis_safety;	/* need to increase bisection safety */
    double   *Ares;		/* how well Lanczos calc. eigpair lambda,y */
    int      *index;		/* the Ritz index of an eigenpair */
    struct orthlink_float **solist;	/* vec. of structs with vecs. to orthog. against */
    struct scanlink *scanlist;		/* linked list of fields to do with min ritz vals */
    struct scanlink *curlnk;		/* for traversing the scanlist */
    double    bji_tol;		/* tol on bji est. of eigen residual of A */
    int       converged;	/* has the iteration converged? */
    double    goodtol;		/* error tolerance for a good Ritz vector */
    int       ngood;		/* total number of good Ritz pairs at current step */
    int       maxngood;		/* biggest val of ngood through current step */
    int       left_ngood;	/* number of good Ritz pairs on left end */
    int       lastpause;	/* Most recent step with good ritz vecs */
    int       nopauses;		/* Have there been any pauses? */
    int       interval;		/* number of steps between pauses */
    double    time;             /* Current clock time */
    int       left_goodlim;	/* number of ritz pairs checked on left end */
    double    Anorm;		/* Norm estimate of the Laplacian matrix */
    int       pausemode;	/* which Lanczos pausing criterion to use */
    int       pause;		/* whether to pause */
    int       temp;		/* used to prevent redundant index computations */
    double   *extvec;		/* n-vector solving the extended A eigenproblem */
    double   *v;		/* j-vector solving the extended T eigenproblem */
    double    extval=0.0;	/* computed extended eigenvalue (of both A and T) */
    double   *work1, *work2;    /* work vectors */
    double    check;		/* to check an orthogonality condition */
    double    numerical_zero;	/* used for zero in presense of round-off  */
    int       ritzval_flag;	/* status flag for get_ritzvals() */
    double    resid;		/* residual */
    int       memory_ok;	/* TRUE until memory runs out */
    float    *vwsqrt_float = NULL;     /* float version of vwsqrt */

    struct orthlink_float *makeorthlnk_float();	/* makes space for new entry in orthog. set */
    struct scanlink *mkscanlist();		/* init scan list for min ritz vecs */
    double   *mkvec();			/* allocates space for a vector */
    float    *mkvec_float();		/* allocates space for a vector */
    float    *mkvec_ret_float();	/* mkvec() which returns error code */
    double    dot_float();		/* standard dot product routine */
    double    ch_norm();			/* vector norm */
    double    norm_float();		/* vector norm */
    double    Tevec();			/* calc eigenvector of T by linear recurrence */
    double    lanc_seconds();   	/* switcheable timer */
          	/* free allocated memory safely */
    int       lanpause_float();      	/* figure when to pause Lanczos iteration */
    int       get_ritzvals();   	/* compute eigenvalues of T */
    void      setvec();         	/* initialize a vector */
    void      setvec_float();         	/* initialize a vector */
    void      vecscale_float();     	/* scale a vector */
    void      splarax();        	/* matrix vector multiply */
    void      splarax_float();        	/* matrix vector multiply */
    void      update_float();         	/* add scalar multiple of a vector to another */
    void      sorthog_float();        	/* orthogonalize vector against list of others */
    void      bail();           	/* our exit routine */
    void      scanmin();        	/* store small values of vector in linked list */
    void      frvec();         		/* free vector */
    void      frvec_float();          	/* free vector */
    void      scadd();          	/* add scalar multiple of vector to another */
    void      scadd_float();          	/* add scalar multiple of vector to another */
    void      scadd_mixed();          	/* add scalar multiple of vector to another */
    void      orthog1_float();        	/* efficiently orthog. against vector of ones */
    void      solistout_float();      	/* print out orthogonalization list */
    void      doubleout();      	/* print a double precision number */
    void      orthogvec_float();      	/* orthogonalize one vector against another */
    void      double_to_float();	/* copy a double vector to a float vector */
    void      get_extval();		/* find extended Ritz values */
    void      scale_diag();		/* scale vector by diagonal matrix */
    void      scale_diag_float();	/* scale vector by diagonal matrix */
    void      strout();			/* print string to screen and file */

    if (DEBUG_TRACE > 0) {
	printf("<Entering lanczos_ext_float>\n");
    }

    if (DEBUG_EVECS > 0) {
	printf("Selective orthogonalization Lanczos for extended eigenproblem, matrix size = %d.\n", n);
    }

    /* Initialize time. */
    time = lanc_seconds();

    if (d != 1) {
	bail("ERROR: Extended Lanczos only available for bisection.",1);
        /* ... something must be wrong upstream. */
    }

    if (n < d + 1) {
	bail("ERROR: System too small for number of eigenvalues requested.",1);
	/* ... d+1 since don't use zero eigenvalue pair */
    }

    /* Allocate space. */
    maxj = LANCZOS_MAXITNS;
    u = mkvec_float(1, n);
    u_double = mkvec(1, n);
    r = mkvec_float(1, n);
    workn = mkvec_float(1, n);
    workn_double = mkvec(1, n);
    Ares = mkvec(0, d);
    index = smalloc((d + 1) * sizeof(int));
    alpha = mkvec(1, maxj);
    beta = mkvec(0, maxj);
    ritz = mkvec(1, maxj);
    s = mkvec(1, maxj);
    bj = mkvec(1, maxj);
    workj = mkvec(0, maxj);
    q = smalloc((maxj + 1) * sizeof(float *));
    solist = smalloc((maxj + 1) * sizeof(struct orthlink_float *));
    scanlist = mkscanlist(d);
    extvec = mkvec(1, n);
    v = mkvec(1, maxj);
    work1 = mkvec(1, maxj);
    work2 = mkvec(1, maxj);

    /* Set some constants governing orthogonalization */
    ngood = 0;
    maxngood = 0;
    bji_tol = eigtol;
    Anorm = 2 * maxdeg;				/* Gershgorin estimate for ||A|| */
    goodtol = Anorm * sqrt(DOUBLE_EPSILON);	/* Parlett & Scott's bound, p.224 */
    interval = 2 + (int) min(LANCZOS_SO_INTERVAL - 2, n / (2 * LANCZOS_SO_INTERVAL));
    bis_safety = BISECTION_SAFETY;
    numerical_zero = 1.0e-6;

    if (DEBUG_EVECS > 0) {
	printf("  maxdeg %g\n", maxdeg);
	printf("  goodtol %g\n", goodtol);
	printf("  interval %d\n", interval);
        printf("  maxj %d\n", maxj);
    }

    /* Make a float copy of vwsqrt */
    if (vwsqrt != NULL) {
      vwsqrt_float = mkvec_float(0,n);
      double_to_float(vwsqrt_float,1,n,vwsqrt);
    }

    /* Initialize space. */
    double_to_float(r,1,n,gvec);
    if (vwsqrt_float != NULL) {
	scale_diag_float(r,1,n,vwsqrt_float);
    }
    check = norm_float(r,1,n);
    if (vwsqrt_float == NULL) {
	orthog1_float(r, 1, n);
    }
    else { 
	orthogvec_float(r, 1, n, vwsqrt_float);
    }
    check = fabs(check - norm_float(r,1,n));
    if (check > 10*numerical_zero && WARNING_EVECS > 0) {
	strout("WARNING: In terminal propagation, rhs should have no component in the"); 
        printf("         nullspace of the Laplacian, so check val %g should be zero.\n", check); 
	if (Output_File != NULL) {
            fprintf(Output_File,
		"         nullspace of the Laplacian, so check val %g should be zero.\n",
	    check); 
	}
    }
    beta[0] = norm_float(r, 1, n);
    q[0] = mkvec_float(1, n);
    setvec_float(q[0], 1, n, 0.0);
    setvec(bj, 1, maxj, DOUBLE_MAX);

    if (beta[0] < numerical_zero) {
     /* The rhs vector, Dg, of the transformed problem is numerically zero or is
	in the null space of the Laplacian, so this is not a well posed extended
	eigenproblem. Set maxj to zero to force a quick exit but still clean-up
	memory and return(1) to indicate to eigensolve that it should call the
	default eigensolver routine for the standard eigenproblem. */
	maxj = 0;
    }
	
    /* Main Lanczos loop. */
    j = 1;
    lastpause = 0;
    pausemode = 1;
    left_ngood = 0;
    left_goodlim = 0;
    converged = FALSE;
    Sres_max = 0.0;
    inc_bis_safety = FALSE;
    nopauses = TRUE;
    memory_ok = TRUE;
    init_time += lanc_seconds() - time;
    while ((j <= maxj) && (!converged) && memory_ok) {
        time = lanc_seconds();

	/* Allocate next Lanczos vector. If fail, back up to last pause. */
	q[j] = mkvec_ret_float(1, n);
        if (q[j] == NULL) {
	    memory_ok = FALSE;
  	    if (DEBUG_EVECS > 0 || WARNING_EVECS > 0) {
                strout("WARNING: Lanczos_ext out of memory; computing best approximation available.\n");
            }
	    if (nopauses) {
	        bail("ERROR: Sorry, can't salvage Lanczos_ext.",1); 
  	        /* ... save yourselves, men.  */
	    }
    	    for (i = lastpause+1; i <= j-1; i++) {
	        frvec_float(q[i], 1);
    	    }
            j = lastpause;
	}

        /* Basic Lanczos iteration */
	vecscale_float(q[j], 1, n, (float)(1.0 / beta[j - 1]), r);
        blas_time += lanc_seconds() - time;
        time = lanc_seconds(); 
	splarax_float(u, A, n, q[j], vwsqrt_float, workn);
        splarax_time += lanc_seconds() - time;
        time = lanc_seconds();
	update_float(r, 1, n, u, (float)(-beta[j - 1]), q[j - 1]);
	alpha[j] = dot_float(r, 1, n, q[j]);
	update_float(r, 1, n, r, (float)(-alpha[j]), q[j]);
        blas_time += lanc_seconds() - time;

        /* Selective orthogonalization */
        time = lanc_seconds();
	if (vwsqrt_float == NULL) {
	    orthog1_float(r, 1, n);
	}
	else {
	    orthogvec_float(r, 1, n, vwsqrt_float);
	}
	if ((j == (lastpause + 1)) || (j == (lastpause + 2))) {
	    sorthog_float(r, n, solist, ngood);
	}
        orthog_time += lanc_seconds() - time;
	beta[j] = norm_float(r, 1, n);
        time = lanc_seconds();
	pause = lanpause_float(j, lastpause, interval, q, n, &pausemode, version, beta[j]);
        pause_time += lanc_seconds() - time;
	if (pause) {
	    nopauses = FALSE;
	    lastpause = j;

	    /* Compute limits for checking Ritz pair convergence. */
	    if (version == 2) {
		if (left_ngood + 2 > left_goodlim) {
		    left_goodlim = left_ngood + 2;
		}
	    }

	    /* Special case: need at least d Ritz vals on left. */
	    left_goodlim = max(left_goodlim, d);

	    /* Special case: can't find more than j total Ritz vals. */
	    if (left_goodlim > j) {
		left_goodlim = min(left_goodlim, j);
	    }

	    /* Find Ritz vals using faster of Sturm bisection or ql. */
            time = lanc_seconds();
	    if (inc_bis_safety) {
		bis_safety *= 10;
		inc_bis_safety = FALSE;
	    }
	    ritzval_flag = get_ritzvals(alpha, beta, j, Anorm, workj, ritz, d,
			 left_goodlim, 0, eigtol, bis_safety);
            ql_time += lanc_seconds() - time;

	    if (ritzval_flag != 0) {
                bail("ERROR: Lanczos_ext failed in computing eigenvalues of T.",1);
		/* ... we recover from this in lanczos_SO, but don't worry here. */ 
	    }

	    /* Scan for minimum evals of tridiagonal. */
            time = lanc_seconds();
	    scanmin(ritz, 1, j, &scanlist);
            scan_time += lanc_seconds() - time;

	    /* Compute Ritz pair bounds at left end. */
            time = lanc_seconds();
	    setvec(bj, 1, j, 0.0);
	    for (i = 1; i <= left_goodlim; i++) {
		Sres = Tevec(alpha, beta - 1, j, ritz[i], s);
		if (Sres > Sres_max) {
		    Sres_max = Sres;
		}
		if (Sres > SRESTOL) {
		    inc_bis_safety = TRUE;
		}
		bj[i] = s[j] * beta[j];
	    }
            ritz_time += lanc_seconds() - time;

	    /* Show the portion of the spectrum checked for convergence. */
	    if (DEBUG_EVECS > 2) {
                time = lanc_seconds();
		printf("\nindex         Ritz vals            bji bounds\n");
		for (i = 1; i <= left_goodlim; i++) {
		    printf("  %3d", i);
		    doubleout(ritz[i], 1);
		    doubleout(bj[i], 1);
		    printf("\n");
		}
		printf("\n");
		curlnk = scanlist;
		while (curlnk != NULL) {
		    temp = curlnk->indx;
		    if ((temp > left_goodlim) && (temp < j)) {
			printf("  %3d", temp);
			doubleout(ritz[temp], 1);
			doubleout(bj[temp], 1);
			printf("\n");
		    }
		    curlnk = curlnk->pntr;
		}
		printf("                            -------------------\n");
		printf("                goodtol:    %19.16f\n\n", goodtol);
                debug_time += lanc_seconds() - time;
	    }

	    get_extval(alpha, beta, j, ritz[1], s, eigtol, beta[0], sigma, &extval,
		v, work1, work2);

	    /* check convergence of Ritz pairs */
            time = lanc_seconds();
	    converged = TRUE;
	    if (j < d)
		converged = FALSE;
	    else {
		curlnk = scanlist;
		while (curlnk != NULL) {
		    if (bj[curlnk->indx] > bji_tol) {
			converged = FALSE;
		    }
		    curlnk = curlnk->pntr;
		}
	    }
            scan_time += lanc_seconds() - time;

	    if (!converged) {
		ngood = 0;
		left_ngood = 0;	/* for setting left_goodlim on next loop */

		/* Compute converged Ritz pairs on left end */
                time = lanc_seconds();
		for (i = 1; i <= left_goodlim; i++) {
		    if (bj[i] <= goodtol) {
			ngood += 1;
			left_ngood += 1;
			if (ngood > maxngood) {
			    maxngood = ngood;
			    solist[ngood] = makeorthlnk_float();
			    (solist[ngood])->vec = mkvec_float(1, n);
			}
			(solist[ngood])->index = i;
			Sres = Tevec(alpha, beta - 1, j, ritz[i], s);
			if (Sres > Sres_max) {
			    Sres_max = Sres;
			}
			if (Sres > SRESTOL) {
			    inc_bis_safety = TRUE;
			}
			setvec_float((solist[ngood])->vec, 1, n, 0.0);
			for (k = 1; k <= j; k++) {
			    scadd_float((solist[ngood])->vec, 1, n, s[k], q[k]);
			}
		    }
		}
                ritz_time += lanc_seconds() - time;

		if (DEBUG_EVECS > 2) {
                    time = lanc_seconds();
		    printf("  j %3d; goodlim lft %2d, rgt %2d; list ",
			   j, left_goodlim, 0);
		    solistout_float(solist, n, ngood, j);
                    printf("---------------------end of iteration---------------------\n\n");
                    debug_time += lanc_seconds() - time;
		}
	    }
	}
	j++;
    }
    j--;

    if (DEBUG_EVECS > 0) {
        time = lanc_seconds();
	if (maxj == 0) {
	    printf("Not extended eigenproblem -- calling ordinary eigensolver.\n");
	}
	else {
	    printf("  Lanczos_ext itns: %d\n",j);
	    printf("  eigenvalue: %g\n",ritz[1]);
	    printf("  extended eigenvalue: %g\n",extval);
	}
       debug_time += lanc_seconds() - time;
    }

    if (maxj != 0) {
        /* Compute (scaled) extended eigenvector. */
        time = lanc_seconds(); 
        setvec(y[1], 1, n, 0.0);
        for (k = 1; k <= j; k++) {
            scadd_mixed(y[1], 1, n, v[k], q[k]);
        }
        evec_time += lanc_seconds() - time;
        /* Note: assign() will scale this y vector back to x (since y = Dx) */ 

       /* Compute and check residual directly. Use the Ay = extval*y + Dg version of
          the problem for convenience. Note that u and v are used here as workspace */ 
        time = lanc_seconds(); 
        splarax(workn_double, A, n, y[1], vwsqrt, u_double);
        scadd(workn_double, 1, n, -extval, y[1]);
        scale_diag(gvec,1,n,vwsqrt);
        scadd(workn_double, 1, n, -1.0, gvec);
        resid = ch_norm(workn_double, 1, n);
        if (DEBUG_EVECS > 0) {
	    printf("  extended residual: %g\n",resid);
	    if (Output_File != NULL) {
	        fprintf(Output_File, "  extended residual: %g\n",resid);
	    }
	}
	if (WARNING_EVECS > 0 && resid > eigtol) {
	    printf("WARNING: Extended residual (%g) greater than tolerance (%g).\n",
		resid, eigtol);
	    if (Output_File != NULL) {
                fprintf(Output_File,
		    "WARNING: Extended residual (%g) greater than tolerance (%g).\n",
		    resid, eigtol);
	    }
	}
        debug_time += lanc_seconds() - time;
    } 


    /* free up memory */
    time = lanc_seconds();
    frvec_float(u, 1);
    frvec(u_double, 1);
    frvec_float(r, 1);
    frvec_float(workn, 1);
    frvec(workn_double, 1);
    frvec(Ares, 0);
    sfree(index);
    frvec(alpha, 1);
    frvec(beta, 0);
    frvec(ritz, 1);
    frvec(s, 1);
    frvec(bj, 1);
    frvec(workj, 0);
    for (i = 0; i <= j; i++) {
	frvec_float(q[i], 1);
    }
    sfree(q);
    while (scanlist != NULL) {
	curlnk = scanlist->pntr;
	sfree(scanlist);
	scanlist = curlnk;
    }
    for (i = 1; i <= maxngood; i++) {
	frvec_float((solist[i])->vec, 1);
	sfree(solist[i]);
    }
    sfree(solist);
    frvec(extvec, 1);
    frvec(v, 1);
    frvec(work1, 1);
    frvec(work2, 1);
    if (vwsqrt != NULL)
      frvec_float(vwsqrt_float, 1);
    
    init_time += lanc_seconds() - time;

    if (maxj == 0) return(1);  /* see note on beta[0] and maxj above */
    else return(0);
}