double checkeig_ext ( double *err, double *work, /* work vector of length n */ struct vtx_data **A, double *y, int n, double extval, double *vwsqrt, double *gvec, double eigtol, int warnings /* don't want to see warning messages in one of the contexts this is called */ ) { extern FILE *Output_File; /* output file or null */ extern int DEBUG_EVECS; /* print debugging output? */ extern int WARNING_EVECS; /* print warning messages? */ double resid; /* the extended eigen residual */ double ch_norm(); /* vector norm */ void splarax(); /* sparse matrix vector mult */ void scadd(); /* scaled vector add */ void scale_diag(); /* scale vector by another's elements */ void cpvec(); /* vector copy */ splarax(err, A, n, y, vwsqrt, work); scadd(err, 1, n, -extval, y); cpvec(work, 1, n, gvec); /* only need if going to re-use gvec */ scale_diag(work, 1, n, vwsqrt); scadd(err, 1, n, -1.0, work); resid = ch_norm(err, 1, n); if (DEBUG_EVECS > 0) { printf(" extended residual: %g\n", resid); if (Output_File != NULL) { fprintf(Output_File, " extended residual: %g\n", resid); } } if (warnings && WARNING_EVECS > 0 && resid > eigtol) { printf("WARNING: Extended residual (%g) greater than tolerance (%g).\n", resid, eigtol); if (Output_File != NULL) { fprintf(Output_File, "WARNING: Extended residual (%g) greater than tolerance (%g).\n", resid, eigtol); } } return (resid); }
/* Check an eigenpair of A by direct multiplication. */ double checkeig (double *err, struct vtx_data **A, double *y, int n, double lambda, double *vwsqrt, double *work) { double resid; double normy; double ch_norm(); void splarax(), scadd(); splarax(err, A, n, y, vwsqrt, work); scadd(err, 1, n, -lambda, y); normy = ch_norm(y, 1, n); resid = ch_norm(err, 1, n) / normy; return (resid); }
void orthogvec ( double *vec1, /* vector to be orthogonalized */ int beg, int end, /* start and stop range for vector */ double *vec2 /* vector to be orthogonalized against */ ) { double alpha; double dot(); void scadd(); alpha = -dot(vec1, beg, end, vec2) / dot(vec2, beg, end, vec2); scadd(vec1, beg, end, alpha, vec2); }
void sorthog ( double *vec, /* vector to be orthogonalized */ int n, /* length of the columns of orth */ struct orthlink **solist, /* set of vecs to orth. against */ int ngood /* number of vecs in solist */ ) { double alpha; double *dir; double dot(); void scadd(); int i; for (i = 1; i <= ngood; i++) { dir = (solist[i])->vec; alpha = -dot(vec, 1, n, dir) / dot(dir, 1, n, dir); scadd(vec, 1, n, alpha, dir); } }
void coarsen ( /* Coarsen until nvtxs <= vmax, compute and uncoarsen. */ struct vtx_data **graph, /* array of vtx data for graph */ int nvtxs, /* number of vertices in graph */ int nedges, /* number of edges in graph */ int using_vwgts, /* are vertices weights being used? */ int using_ewgts, /* are edge weights being used? */ float *term_wgts[], /* terminal weights */ int igeom, /* dimension for geometric information */ float **coords, /* coordinates for vertices */ double **yvecs, /* eigenvectors returned */ int ndims, /* number of eigenvectors to calculate */ int solver_flag, /* which eigensolver to use */ int vmax, /* largest subgraph to stop coarsening */ double eigtol, /* tolerence in eigen calculation */ int nstep, /* number of coarsenings between RQI steps */ int step, /* current step number */ int give_up /* has coarsening bogged down? */ ) { extern FILE *Output_File; /* output file or null */ extern int DEBUG_COARSEN; /* debug flag for coarsening */ extern int PERTURB; /* was matrix perturbed in Lanczos? */ extern double COARSEN_RATIO_MIN; /* min vtx reduction for coarsening */ extern int COARSEN_VWGTS; /* use vertex weights while coarsening? */ extern int COARSEN_EWGTS; /* use edge weights while coarsening? */ extern double refine_time; /* time for RQI/Symmlq iterative refinement */ struct vtx_data **cgraph; /* array of vtx data for coarsened graph */ struct orthlink *orthlist; /* list of lower evecs to suppress */ struct orthlink *newlink; /* lower evec to suppress */ double *cyvecs[MAXDIMS + 1]; /* eigenvectors for subgraph */ double evals[MAXDIMS + 1]; /* eigenvalues returned */ double goal[MAXSETS]; /* needed for convergence mode = 1 */ double *r1, *r2, *work; /* space needed by symmlq/RQI */ double *v, *w, *x, *y; /* space needed by symmlq/RQI */ double *gvec; /* rhs vector in extended eigenproblem */ double evalest; /* eigenvalue estimate returned by RQI */ double maxdeg; /* maximum weighted degree of a vertex */ float **ccoords; /* coordinates for coarsened graph */ float *cterm_wgts[MAXSETS]; /* coarse graph terminal weights */ float *new_term_wgts[MAXSETS]; /* terminal weights for Bui's method*/ float **real_term_wgts; /* one of the above */ float *twptr; /* loops through term_wgts */ float *twptr_save; /* copy of twptr */ float *ctwptr; /* loops through cterm_wgts */ double *vwsqrt = NULL; /* square root of vertex weights */ double norm, alpha; /* values used for orthogonalization */ double initshift; /* initial shift for RQI */ double total_vwgt; /* sum of all the vertex weights */ double w1, w2; /* weights of two sets */ double sigma; /* norm of rhs in extended eigenproblem */ double term_tot; /* sum of all terminal weights */ int *space; /* room for assignment in Lanczos */ int *morespace; /* room for assignment in Lanczos */ int *v2cv; /* mapping from vertices to coarse vtxs */ int vwgt_max; /* largest vertex weight */ int oldperturb; /* saves PERTURB value */ int cnvtxs; /* number of vertices in coarsened graph */ int cnedges; /* number of edges in coarsened graph */ int nextstep; /* next step in RQI test */ int nsets; /* number of sets being created */ int i, j; /* loop counters */ double time; /* time marker */ double dot(), ch_normalize(), find_maxdeg(), seconds(); struct orthlink *makeorthlnk(); void makevwsqrt(), eigensolve(), coarsen1(), orthogvec(), rqi_ext(); void ch_interpolate(), orthog1(), rqi(), scadd(), free_graph(); if (DEBUG_COARSEN > 0) { printf("<Entering coarsen, step=%d, nvtxs=%d, nedges=%d, vmax=%d>\n", step, nvtxs, nedges, vmax); } nsets = 1 << ndims; /* Is problem small enough to solve? */ if (nvtxs <= vmax || give_up) { if (using_vwgts) { vwsqrt = smalloc((nvtxs + 1) * sizeof(double)); makevwsqrt(vwsqrt, graph, nvtxs); } else vwsqrt = NULL; maxdeg = find_maxdeg(graph, nvtxs, using_ewgts, (float *) NULL); if (using_vwgts) { vwgt_max = 0; total_vwgt = 0; for (i = 1; i <= nvtxs; i++) { if (graph[i]->vwgt > vwgt_max) vwgt_max = graph[i]->vwgt; total_vwgt += graph[i]->vwgt; } } else { vwgt_max = 1; total_vwgt = nvtxs; } for (i = 0; i < nsets; i++) goal[i] = total_vwgt / nsets; space = smalloc((nvtxs + 1) * sizeof(int)); /* If not coarsening ewgts, then need care with term_wgts. */ if (!using_ewgts && term_wgts[1] != NULL && step != 0) { twptr = smalloc((nvtxs + 1) * (nsets - 1) * sizeof(float)); twptr_save = twptr; for (j = 1; j < nsets; j++) { new_term_wgts[j] = twptr; twptr += nvtxs + 1; } for (j = 1; j < nsets; j++) { twptr = term_wgts[j]; ctwptr = new_term_wgts[j]; for (i = 1; i <= nvtxs; i++) { if (twptr[i] > .5) ctwptr[i] = 1; else if (twptr[i] < -.5) ctwptr[i] = -1; else ctwptr[i] = 0; } } real_term_wgts = new_term_wgts; } else { real_term_wgts = term_wgts; new_term_wgts[1] = NULL; } eigensolve(graph, nvtxs, nedges, maxdeg, vwgt_max, vwsqrt, using_vwgts, using_ewgts, real_term_wgts, igeom, coords, yvecs, evals, 0, space, goal, solver_flag, FALSE, 0, ndims, 3, eigtol); if (real_term_wgts != term_wgts && new_term_wgts[1] != NULL) { sfree(real_term_wgts[1]); } sfree(space); if (vwsqrt != NULL) sfree(vwsqrt); return; } /* Otherwise I have to coarsen. */ if (coords != NULL) { ccoords = smalloc(igeom * sizeof(float *)); } else { ccoords = NULL; } coarsen1(graph, nvtxs, nedges, &cgraph, &cnvtxs, &cnedges, &v2cv, igeom, coords, ccoords, using_ewgts); /* If coarsening isn't working very well, give up and partition. */ give_up = FALSE; if (nvtxs * COARSEN_RATIO_MIN < cnvtxs && cnvtxs > vmax ) { printf("WARNING: Coarsening not making enough progress, nvtxs = %d, cnvtxs = %d.\n", nvtxs, cnvtxs); printf(" Recursive coarsening being stopped prematurely.\n"); if (Output_File != NULL) { fprintf(Output_File, "WARNING: Coarsening not making enough progress, nvtxs = %d, cnvtxs = %d.\n", nvtxs, cnvtxs); fprintf(Output_File, " Recursive coarsening being stopped prematurely.\n"); } give_up = TRUE; } /* Create space for subgraph yvecs. */ for (i = 1; i <= ndims; i++) { cyvecs[i] = smalloc((cnvtxs + 1) * sizeof(double)); } /* Make coarse version of terminal weights. */ if (term_wgts[1] != NULL) { twptr = smalloc((cnvtxs + 1) * (nsets - 1) * sizeof(float)); twptr_save = twptr; for (i = (cnvtxs + 1) * (nsets - 1); i ; i--) { *twptr++ = 0; } twptr = twptr_save; for (j = 1; j < nsets; j++) { cterm_wgts[j] = twptr; twptr += cnvtxs + 1; } for (j = 1; j < nsets; j++) { ctwptr = cterm_wgts[j]; twptr = term_wgts[j]; for (i = 1; i < nvtxs; i++){ ctwptr[v2cv[i]] += twptr[i]; } } } else { cterm_wgts[1] = NULL; } /* Now recurse on coarse subgraph. */ nextstep = step + 1; coarsen(cgraph, cnvtxs, cnedges, COARSEN_VWGTS, COARSEN_EWGTS, cterm_wgts, igeom, ccoords, cyvecs, ndims, solver_flag, vmax, eigtol, nstep, nextstep, give_up); ch_interpolate(yvecs, cyvecs, ndims, graph, nvtxs, v2cv, using_ewgts); sfree(cterm_wgts[1]); sfree(v2cv); /* I need to do Rayleigh Quotient Iteration each nstep stages. */ time = seconds(); if (!(step % nstep)) { oldperturb = PERTURB; PERTURB = FALSE; /* Should I do some orthogonalization here against vwsqrt? */ if (using_vwgts) { vwsqrt = smalloc((nvtxs + 1) * sizeof(double)); makevwsqrt(vwsqrt, graph, nvtxs); for (i = 1; i <= ndims; i++) orthogvec(yvecs[i], 1, nvtxs, vwsqrt); } else for (i = 1; i <= ndims; i++) orthog1(yvecs[i], 1, nvtxs); /* Allocate space that will be needed in RQI. */ r1 = smalloc(7 * (nvtxs + 1) * sizeof(double)); r2 = &r1[nvtxs + 1]; v = &r1[2 * (nvtxs + 1)]; w = &r1[3 * (nvtxs + 1)]; x = &r1[4 * (nvtxs + 1)]; y = &r1[5 * (nvtxs + 1)]; work = &r1[6 * (nvtxs + 1)]; if (using_vwgts) { vwgt_max = 0; total_vwgt = 0; for (i = 1; i <= nvtxs; i++) { if (graph[i]->vwgt > vwgt_max) vwgt_max = graph[i]->vwgt; total_vwgt += graph[i]->vwgt; } } else { vwgt_max = 1; total_vwgt = nvtxs; } for (i = 0; i < nsets; i++) goal[i] = total_vwgt / nsets; space = smalloc((nvtxs + 1) * sizeof(int)); morespace = smalloc((nvtxs) * sizeof(int)); initshift = 0; orthlist = NULL; for (i = 1; i < ndims; i++) { ch_normalize(yvecs[i], 1, nvtxs); rqi(graph, yvecs, i, nvtxs, r1, r2, v, w, x, y, work, eigtol, initshift, &evalest, vwsqrt, orthlist, 0, nsets, space, morespace, 3, goal, vwgt_max, ndims); /* Now orthogonalize higher yvecs against this one. */ norm = dot(yvecs[i], 1, nvtxs, yvecs[i]); for (j = i + 1; j <= ndims; j++) { alpha = -dot(yvecs[j], 1, nvtxs, yvecs[i]) / norm; scadd(yvecs[j], 1, nvtxs, alpha, yvecs[i]); } /* Now prepare for next pass through loop. */ initshift = evalest; newlink = makeorthlnk(); newlink->vec = yvecs[i]; newlink->pntr = orthlist; orthlist = newlink; } ch_normalize(yvecs[ndims], 1, nvtxs); if (term_wgts[1] != NULL && ndims == 1) { /* Solve extended eigen problem */ /* If not coarsening ewgts, then need care with term_wgts. */ if (!using_ewgts && term_wgts[1] != NULL && step != 0) { twptr = smalloc((nvtxs + 1) * (nsets - 1) * sizeof(float)); twptr_save = twptr; for (j = 1; j < nsets; j++) { new_term_wgts[j] = twptr; twptr += nvtxs + 1; } for (j = 1; j < nsets; j++) { twptr = term_wgts[j]; ctwptr = new_term_wgts[j]; for (i = 1; i <= nvtxs; i++) { if (twptr[i] > .5) ctwptr[i] = 1; else if (twptr[i] < -.5) ctwptr[i] = -1; else ctwptr[i] = 0; } } real_term_wgts = new_term_wgts; } else { real_term_wgts = term_wgts; new_term_wgts[1] = NULL; } /* Following only works for bisection. */ w1 = goal[0]; w2 = goal[1]; sigma = sqrt(4*w1*w2/(w1+w2)); gvec = smalloc((nvtxs+1)*sizeof(double)); term_tot = sigma; /* Avoids lint warning for now. */ term_tot = 0; for (j=1; j<=nvtxs; j++) term_tot += (real_term_wgts[1])[j]; term_tot /= (w1+w2); if (using_vwgts) { for (j=1; j<=nvtxs; j++) { gvec[j] = (real_term_wgts[1])[j]/graph[j]->vwgt - term_tot; } } else { for (j=1; j<=nvtxs; j++) { gvec[j] = (real_term_wgts[1])[j] - term_tot; } } rqi_ext(); sfree(gvec); if (real_term_wgts != term_wgts && new_term_wgts[1] != NULL) { sfree(new_term_wgts[1]); } } else { rqi(graph, yvecs, ndims, nvtxs, r1, r2, v, w, x, y, work, eigtol, initshift, &evalest, vwsqrt, orthlist, 0, nsets, space, morespace, 3, goal, vwgt_max, ndims); } refine_time += seconds() - time; /* Free the space allocated for RQI. */ sfree(morespace); sfree(space); while (orthlist != NULL) { newlink = orthlist->pntr; sfree(orthlist); orthlist = newlink; } sfree(r1); if (vwsqrt != NULL) sfree(vwsqrt); PERTURB = oldperturb; } if (DEBUG_COARSEN > 0) { printf(" Leaving coarsen, step=%d\n", step); } /* Free the space that was allocated. */ if (ccoords != NULL) { for (i = 0; i < igeom; i++) sfree(ccoords[i]); sfree(ccoords); } for (i = ndims; i > 0; i--) sfree(cyvecs[i]); free_graph(cgraph); }
bool power_iteration(double **square_mat, int n, int neigs, double **eigs, double *evals, bool initialize) { /* compute the 'neigs' top eigenvectors of 'square_mat' using power iteration */ int i, j; double *tmp_vec = N_GNEW(n, double); double *last_vec = N_GNEW(n, double); double *curr_vector; double len; double angle; double alpha; int iteration = 0; int largest_index; double largest_eval; int Max_iterations = 30 * n; double tol = 1 - p_iteration_threshold; if (neigs >= n) { neigs = n; } for (i = 0; i < neigs; i++) { curr_vector = eigs[i]; /* guess the i-th eigen vector */ choose: if (initialize) for (j = 0; j < n; j++) curr_vector[j] = rand() % 100; /* orthogonalize against higher eigenvectors */ for (j = 0; j < i; j++) { alpha = -dot(eigs[j], 0, n - 1, curr_vector); scadd(curr_vector, 0, n - 1, alpha, eigs[j]); } len = norm(curr_vector, 0, n - 1); if (len < 1e-10) { /* We have chosen a vector colinear with prvious ones */ goto choose; } vecscale(curr_vector, 0, n - 1, 1.0 / len, curr_vector); iteration = 0; do { iteration++; cpvec(last_vec, 0, n - 1, curr_vector); right_mult_with_vector_d(square_mat, n, n, curr_vector, tmp_vec); cpvec(curr_vector, 0, n - 1, tmp_vec); /* orthogonalize against higher eigenvectors */ for (j = 0; j < i; j++) { alpha = -dot(eigs[j], 0, n - 1, curr_vector); scadd(curr_vector, 0, n - 1, alpha, eigs[j]); } len = norm(curr_vector, 0, n - 1); if (len < 1e-10 || iteration > Max_iterations) { /* We have reached the null space (e.vec. associated with e.val. 0) */ goto exit; } vecscale(curr_vector, 0, n - 1, 1.0 / len, curr_vector); angle = dot(curr_vector, 0, n - 1, last_vec); } while (fabs(angle) < tol); evals[i] = angle * len; /* this is the Rayleigh quotient (up to errors due to orthogonalization): u*(A*u)/||A*u||)*||A*u||, where u=last_vec, and ||u||=1 */ } exit: for (; i < neigs; i++) { /* compute the smallest eigenvector, which are */ /* probably associated with eigenvalue 0 and for */ /* which power-iteration is dangerous */ curr_vector = eigs[i]; /* guess the i-th eigen vector */ for (j = 0; j < n; j++) curr_vector[j] = rand() % 100; /* orthogonalize against higher eigenvectors */ for (j = 0; j < i; j++) { alpha = -dot(eigs[j], 0, n - 1, curr_vector); scadd(curr_vector, 0, n - 1, alpha, eigs[j]); } len = norm(curr_vector, 0, n - 1); vecscale(curr_vector, 0, n - 1, 1.0 / len, curr_vector); evals[i] = 0; } /* sort vectors by their evals, for overcoming possible mis-convergence: */ for (i = 0; i < neigs - 1; i++) { largest_index = i; largest_eval = evals[largest_index]; for (j = i + 1; j < neigs; j++) { if (largest_eval < evals[j]) { largest_index = j; largest_eval = evals[largest_index]; } } if (largest_index != i) { /* exchange eigenvectors: */ cpvec(tmp_vec, 0, n - 1, eigs[i]); cpvec(eigs[i], 0, n - 1, eigs[largest_index]); cpvec(eigs[largest_index], 0, n - 1, tmp_vec); evals[largest_index] = evals[i]; evals[i] = largest_eval; } } free(tmp_vec); free(last_vec); return (iteration <= Max_iterations); }
/* Benchmark certain kernel operations */ void time_kernels ( struct vtx_data **A, /* matrix/graph being analyzed */ int n, /* number of rows/columns in matrix */ double *vwsqrt /* square roots of vertex weights */ ) { extern int DEBUG_PERTURB; /* debug flag for matrix perturbation */ extern int PERTURB; /* randomly perturb to break symmetry? */ extern int NPERTURB; /* number of edges to perturb */ extern int DEBUG_TRACE; /* trace main execution path */ extern double PERTURB_MAX; /* maximum size of perturbation */ int i, beg, end; double *dvec1, *dvec2, *dvec3; float *svec1, *svec2, *svec3, *vwsqrt_float; double norm_dvec, norm_svec; double dot_dvec, dot_svec; double time, time_dvec, time_svec; double diff; double factor, fac; float factor_float, fac_float; int loops; double min_time, target_time; double *mkvec(); float *mkvec_float(); void frvec(), frvec_float(); void vecran(); double ch_norm(), dot(); double norm_float(), dot_float(); double seconds(); void scadd(), scadd_float(), update(), update_float(); void splarax(), splarax_float(); void perturb_init(), perturb_clear(); if (DEBUG_TRACE > 0) { printf("<Entering time_kernels>\n"); } beg = 1; end = n; dvec1 = mkvec(beg, end); dvec2 = mkvec(beg, end); dvec3 = mkvec(beg - 1, end); svec1 = mkvec_float(beg, end); svec2 = mkvec_float(beg, end); svec3 = mkvec_float(beg - 1, end); if (vwsqrt == NULL) { vwsqrt_float = NULL; } else { vwsqrt_float = mkvec_float(beg - 1, end); for (i = beg - 1; i <= end; i++) { vwsqrt_float[i] = vwsqrt[i]; } } vecran(dvec1, beg, end); vecran(dvec2, beg, end); vecran(dvec3, beg, end); for (i = beg; i <= end; i++) { svec1[i] = dvec1[i]; svec2[i] = dvec2[i]; svec3[i] = dvec3[i]; } /* Set number of loops so that ch_norm() takes about one second. This should insulate against inaccurate timings on faster machines. */ loops = 1; time_dvec = 0; min_time = 0.5; target_time = 1.0; while (time_dvec < min_time) { time = seconds(); for (i = loops; i; i--) { norm_dvec = ch_norm(dvec1, beg, end); } time_dvec = seconds() - time; if (time_dvec < min_time) { loops = 10 * loops; } } loops = (target_time / time_dvec) * loops; if (loops < 1) loops = 1; printf(" Kernel benchmarking\n"); printf("Time (in seconds) for %d loops of each operation:\n\n", loops); printf("Routine Double Float Discrepancy Description\n"); printf("------- ------ ----- ----------- -----------\n"); /* Norm operation */ time = seconds(); for (i = loops; i; i--) { norm_dvec = ch_norm(dvec1, beg, end); } time_dvec = seconds() - time; time = seconds(); for (i = loops; i; i--) { norm_svec = norm_float(svec1, beg, end); } time_svec = seconds() - time; diff = norm_dvec - norm_svec; printf("norm %6.2f %6.2f %14.5e", time_dvec, time_svec, diff); printf(" 2 norm\n"); /* Dot operation */ time = seconds(); for (i = loops; i; i--) { dot_dvec = dot(dvec1, beg, end, dvec2); } time_dvec = seconds() - time; time = seconds(); for (i = loops; i; i--) { dot_svec = dot_float(svec1, beg, end, svec2); } time_svec = seconds() - time; diff = dot_dvec - dot_svec; printf("dot %6.2f %6.2f %14.5e", time_dvec, time_svec, diff); printf(" scalar product\n"); /* Scadd operation */ factor = 1.01; factor_float = factor; fac = factor; time = seconds(); for (i = loops; i; i--) { scadd(dvec1, beg, end, fac, dvec2); fac = -fac; /* to keep things in scale */ } time_dvec = seconds() - time; fac_float = factor_float; time = seconds(); for (i = loops; i; i--) { scadd_float(svec1, beg, end, fac_float, svec2); fac_float = -fac_float; /* to keep things in scale */ } time_svec = seconds() - time; diff = checkvec(dvec1, beg, end, svec1); printf("scadd %6.2f %6.2f %14.5e", time_dvec, time_svec, diff); printf(" vec1 <- vec1 + alpha*vec2\n"); /* Update operation */ time = seconds(); for (i = loops; i; i--) { update(dvec1, beg, end, dvec2, factor, dvec3); } time_dvec = seconds() - time; time = seconds(); for (i = loops; i; i--) { update_float(svec1, beg, end, svec2, factor_float, svec3); } time_svec = seconds() - time; diff = checkvec(dvec1, beg, end, svec1); printf("update %6.2f %6.2f %14.2g", time_dvec, time_svec, diff); printf(" vec1 <- vec2 + alpha*vec3\n"); /* splarax operation */ if (PERTURB) { if (NPERTURB > 0 && PERTURB_MAX > 0.0) { perturb_init(n); if (DEBUG_PERTURB > 0) { printf("Matrix being perturbed with scale %e\n", PERTURB_MAX); } } else if (DEBUG_PERTURB > 0) { printf("Matrix not being perturbed\n"); } } time = seconds(); for (i = loops; i; i--) { splarax(dvec1, A, n, dvec2, vwsqrt, dvec3); } time_dvec = seconds() - time; time = seconds(); for (i = loops; i; i--) { splarax_float(svec1, A, n, svec2, vwsqrt_float, svec3); } time_svec = seconds() - time; diff = checkvec(dvec1, beg, end, svec1); printf("splarax %6.2f %6.2f %14.5e", time_dvec, time_svec, diff); printf(" sparse matrix vector multiply\n"); if (PERTURB && NPERTURB > 0 && PERTURB_MAX > 0.0) { perturb_clear(); } printf("\n"); /* Free memory */ frvec(dvec1, 1); frvec(dvec2, 1); frvec(dvec3, 0); frvec_float(svec1, 1); frvec_float(svec2, 1); frvec_float(svec3, 0); if (vwsqrt_float != NULL) { frvec_float(vwsqrt_float, beg - 1); } }
void rqi ( struct vtx_data **A, /* matrix/graph being analyzed */ double **yvecs, /* eigenvectors to be refined */ int index, /* index of vector in yvecs to be refined */ int n, /* number of rows/columns in matrix */ double *r1, double *r2, double *v, double *w, double *x, double *y, double *work, /* work space for symmlq */ double tol, /* error tolerance in eigenpair */ double initshift, /* initial shift */ double *evalest, /* returned eigenvalue */ double *vwsqrt, /* square roots of vertex weights */ struct orthlink *orthlist, /* lower evecs to orthogonalize against */ int cube_or_mesh, /* 0 => hypercube, d => d-dimensional mesh */ int nsets, /* number of sets to divide into */ int *assignment, /* set number of each vtx (length n+1) */ int *active, /* space for nvtxs integers */ int mediantype, /* which partitioning strategy to use */ double *goal, /* desired set sizes */ int vwgt_max, /* largest vertex weight */ int ndims /* dimensionality of partition */ ) { extern int DEBUG_EVECS; /* debug flag for eigen computation */ extern int DEBUG_TRACE; /* trace main execution path */ extern int WARNING_EVECS; /* warning flag for eigen computation */ extern int RQI_CONVERGENCE_MODE; /* type of convergence monitoring to do */ int rqisteps; /* # rqi rqisteps */ double res; /* convergence quant for rqi */ double last_res; /* res on previous rqi step */ double macheps; /* machine precision calculated by symmlq */ double normxlim; /* a stopping criteria for symmlq */ double normx; /* norm of the solution vector */ int symmlqitns; /* # symmlq itns */ int inv_it_steps; /* intial steps of inverse iteration */ long itnmin; /* symmlq input */ double shift, rtol; /* symmlq input */ long precon, goodb, nout; /* symmlq input */ long checka, intlim; /* symmlq input */ double anorm, acond; /* symmlq output */ double rnorm, ynorm; /* symmlq output */ long istop, itn; /* symmlq output */ long long_n; /* copy of n for passing to symmlq */ int warning; /* warning on possible misconvergence */ double factor; /* ratio between previous res and new tol */ double minfactor; /* minimum acceptable value of factor */ int converged; /* has process converged yet? */ double *u; /* name of vector being refined */ int *old_assignment=NULL;/* previous assignment vector */ int *assgn_pntr; /* pntr to assignment vector */ int *old_assgn_pntr; /* pntr to previous assignment vector */ int assigndiff=0; /* discrepancies between old and new assignment */ int assigntol=0; /* tolerance on convergence of assignment vector */ int first; /* is this the first RQI step? */ int i; /* loop index */ double dot(), ch_norm(); int symmlq_(); void splarax(), scadd(), vecscale(), doubleout(), assign(), x2y(), strout(); if (DEBUG_TRACE > 0) { printf("<Entering rqi>\n"); } /* Initialize RQI loop */ u = yvecs[index]; splarax(y, A, n, u, vwsqrt, r1); shift = dot(u, 1, n, y); scadd(y, 1, n, -shift, u); res = ch_norm(y, 1, n); /* eigen-residual */ rqisteps = 0; /* a counter */ symmlqitns = 0; /* a counter */ /* Set invariant symmlq parameters */ precon = FALSE; /* FALSE until we figure out a good way */ goodb = TRUE; /* should be TRUE for this application */ nout = 0; /* set to 0 for no Symmlq output; 6 for lots */ checka = FALSE; /* if don't know by now, too bad */ intlim = n; /* set to enforce a maximum number of Symmlq itns */ itnmin = 0; /* set to enforce a minimum number of Symmlq itns */ long_n = n; /* type change for alint */ if (DEBUG_EVECS > 0) { printf("Using RQI/Symmlq refinement on graph with %d vertices.\n", n); } if (DEBUG_EVECS > 1) { printf(" step lambda est. Ares Symmlq its. istop factor delta\n"); printf(" 0"); doubleout(shift, 1); doubleout(res, 1); printf("\n"); } if (RQI_CONVERGENCE_MODE == 1) { assigntol = tol * n; old_assignment = smalloc((n + 1) * sizeof(int)); } /* Perform RQI */ inv_it_steps = 2; warning = FALSE; factor = 10; minfactor = factor / 2; first = TRUE; if (res < tol) converged = TRUE; else converged = FALSE; while (!converged) { if (res / tol < 1.2) { factor = max(factor / 2, minfactor); } rtol = res / factor; /* exit Symmlq if iterate is this large */ normxlim = 1.0 / rtol; if (rqisteps < inv_it_steps) { shift = initshift; } symmlq_(&long_n, &u[1], &r1[1], &r2[1], &v[1], &w[1], &x[1], &y[1], work, &checka, &goodb, &precon, &shift, &nout, &intlim, &rtol, &istop, &itn, &anorm, &acond, &rnorm, &ynorm, (double *) A, vwsqrt, (double *) orthlist, &macheps, &normxlim, &itnmin); symmlqitns += itn; normx = ch_norm(x, 1, n); vecscale(u, 1, n, 1.0 / normx, x); splarax(y, A, n, u, vwsqrt, r1); shift = dot(u, 1, n, y); scadd(y, 1, n, -shift, u); last_res = res; res = ch_norm(y, 1, n); if (res > last_res) { warning = TRUE; } rqisteps++; if (res < tol) converged = TRUE; if (RQI_CONVERGENCE_MODE == 1 && !converged && ndims == 1) { if (first) { assign(A, yvecs, n, 1, cube_or_mesh, nsets, vwsqrt, assignment, active, mediantype, goal, vwgt_max); x2y(yvecs, ndims, n, vwsqrt); first = FALSE; assigndiff = n; /* dummy value for debug chart */ } else { /* copy assignment to old_assignment */ assgn_pntr = assignment; old_assgn_pntr = old_assignment; for (i = n + 1; i; i--) { *old_assgn_pntr++ = *assgn_pntr++; } assign(A, yvecs, n, ndims, cube_or_mesh, nsets, vwsqrt, assignment, active, mediantype, goal, vwgt_max); x2y(yvecs, ndims, n, vwsqrt); /* count differences in assignment */ assigndiff = 0; assgn_pntr = assignment; old_assgn_pntr = old_assignment; for (i = n + 1; i; i--) { if (*old_assgn_pntr++ != *assgn_pntr++) assigndiff++; } assigndiff = min(assigndiff, n - assigndiff); if (assigndiff <= assigntol) converged = TRUE; } } if (DEBUG_EVECS > 1) { printf(" %2d", rqisteps); doubleout(shift, 1); doubleout(res, 1); printf(" %3ld", itn); printf(" %ld", istop); printf(" %g", factor); if (RQI_CONVERGENCE_MODE == 1) printf(" %d\n", assigndiff); else printf("\n"); } } *evalest = shift; if (WARNING_EVECS > 0 && warning) { strout("WARNING: Residual convergence not monotonic; RQI may have misconverged.\n"); } if (DEBUG_EVECS > 0) { printf("Eval "); doubleout(*evalest, 1); printf(" RQI steps %d, Symmlq iterations %d.\n\n", rqisteps, symmlqitns); } if (RQI_CONVERGENCE_MODE == 1) { sfree(old_assignment); } }
void rescale_layout_polar(double *x_coords, double *y_coords, double *x_foci, double *y_foci, int num_foci, int n, int interval, double width, double height, double margin, double distortion) { // Polar distortion - main function int i; double minX, maxX, minY, maxY; double aspect_ratio; v_data *graph; double scaleX; double scale_ratio; width -= 2 * margin; height -= 2 * margin; // compute original aspect ratio minX = maxX = x_coords[0]; minY = maxY = y_coords[0]; for (i = 1; i < n; i++) { if (x_coords[i] < minX) minX = x_coords[i]; if (y_coords[i] < minY) minY = y_coords[i]; if (x_coords[i] > maxX) maxX = x_coords[i]; if (y_coords[i] > maxY) maxY = y_coords[i]; } aspect_ratio = (maxX - minX) / (maxY - minY); // construct mutual neighborhood graph graph = UG_graph(x_coords, y_coords, n, 0); if (num_foci == 1) { // accelerate execution of most common case rescale_layout_polarFocus(graph, n, x_coords, y_coords, x_foci[0], y_foci[0], interval, distortion); } else { // average-based rescale double *final_x_coords = N_NEW(n, double); double *final_y_coords = N_NEW(n, double); double *cp_x_coords = N_NEW(n, double); double *cp_y_coords = N_NEW(n, double); for (i = 0; i < n; i++) { final_x_coords[i] = final_y_coords[i] = 0; } for (i = 0; i < num_foci; i++) { cpvec(cp_x_coords, 0, n - 1, x_coords); cpvec(cp_y_coords, 0, n - 1, y_coords); rescale_layout_polarFocus(graph, n, cp_x_coords, cp_y_coords, x_foci[i], y_foci[i], interval, distortion); scadd(final_x_coords, 0, n - 1, 1.0 / num_foci, cp_x_coords); scadd(final_y_coords, 0, n - 1, 1.0 / num_foci, cp_y_coords); } cpvec(x_coords, 0, n - 1, final_x_coords); cpvec(y_coords, 0, n - 1, final_y_coords); free(final_x_coords); free(final_y_coords); free(cp_x_coords); free(cp_y_coords); } free(graph[0].edges); free(graph); minX = maxX = x_coords[0]; minY = maxY = y_coords[0]; for (i = 1; i < n; i++) { if (x_coords[i] < minX) minX = x_coords[i]; if (y_coords[i] < minY) minY = y_coords[i]; if (x_coords[i] > maxX) maxX = x_coords[i]; if (y_coords[i] > maxY) maxY = y_coords[i]; } // shift points: for (i = 0; i < n; i++) { x_coords[i] -= minX; y_coords[i] -= minY; } // rescale x_coords to maintain aspect ratio: scaleX = aspect_ratio * (maxY - minY) / (maxX - minX); for (i = 0; i < n; i++) { x_coords[i] *= scaleX; } // scale the layout to fit full drawing area: scale_ratio = MIN((width) / (aspect_ratio * (maxY - minY)), (height) / (maxY - minY)); for (i = 0; i < n; i++) { x_coords[i] *= scale_ratio; y_coords[i] *= scale_ratio; } for (i = 0; i < n; i++) { x_coords[i] += margin; y_coords[i] += margin; } }
int lanczos_ext_float ( struct vtx_data **A, /* sparse matrix in row linked list format */ int n, /* problem size */ int d, /* problem dimension = number of eigvecs to find */ double **y, /* columns of y are eigenvectors of A */ double eigtol, /* tolerance on eigenvectors */ double *vwsqrt, /* square roots of vertex weights */ double maxdeg, /* maximum degree of graph */ int version, /* flags which version of sel. orth. to use */ double *gvec, /* the rhs n-vector in the extended eigen problem */ double sigma /* specifies the norm constraint on extended eigenvector */ ) { extern FILE *Output_File; /* output file or null */ extern int LANCZOS_SO_INTERVAL; /* interval between orthogonalizations */ extern int LANCZOS_MAXITNS; /* maximum Lanczos iterations allowed */ extern int DEBUG_EVECS; /* print debugging output? */ extern int DEBUG_TRACE; /* trace main execution path */ extern int WARNING_EVECS; /* print warning messages? */ extern double BISECTION_SAFETY; /* safety factor for T bisection */ extern double SRESTOL; /* resid tol for T evec comp */ extern double DOUBLE_EPSILON; /* machine precision */ extern double DOUBLE_MAX; /* largest double value */ extern double splarax_time; /* time matvec */ extern double orthog_time; /* time orthogonalization work */ extern double evec_time; /* time to generate eigenvectors */ extern double ql_time; /* time tridiagonal eigenvalue work */ extern double blas_time; /* time for blas. linear algebra */ extern double init_time; /* time to allocate, intialize variables */ extern double scan_time; /* time for scanning eval and bound lists */ extern double debug_time; /* time for (some of) debug computations */ extern double ritz_time; /* time to generate ritz vectors */ extern double pause_time; /* time to compute whether to pause */ int i, j, k; /* indicies */ int maxj; /* maximum number of Lanczos iterations */ float *u, *r; /* Lanczos vectors */ double *u_double; /* double version of u */ double *alpha, *beta; /* the Lanczos scalars from each step */ double *ritz; /* copy of alpha for ql */ double *workj; /* work vector, e.g. copy of beta for ql */ float *workn; /* work vector, e.g. product Av for checkeig */ double *workn_double; /* work vector, e.g. product Av for checkeig */ double *s; /* eigenvector of T */ float **q; /* columns of q are Lanczos basis vectors */ double *bj; /* beta(j)*(last el. of corr. eigvec s of T) */ double bis_safety; /* real safety factor for T bisection */ double Sres; /* how well Tevec calculated eigvec s */ double Sres_max; /* Max value of Sres */ int inc_bis_safety; /* need to increase bisection safety */ double *Ares; /* how well Lanczos calc. eigpair lambda,y */ int *index; /* the Ritz index of an eigenpair */ struct orthlink_float **solist; /* vec. of structs with vecs. to orthog. against */ struct scanlink *scanlist; /* linked list of fields to do with min ritz vals */ struct scanlink *curlnk; /* for traversing the scanlist */ double bji_tol; /* tol on bji est. of eigen residual of A */ int converged; /* has the iteration converged? */ double goodtol; /* error tolerance for a good Ritz vector */ int ngood; /* total number of good Ritz pairs at current step */ int maxngood; /* biggest val of ngood through current step */ int left_ngood; /* number of good Ritz pairs on left end */ int lastpause; /* Most recent step with good ritz vecs */ int nopauses; /* Have there been any pauses? */ int interval; /* number of steps between pauses */ double time; /* Current clock time */ int left_goodlim; /* number of ritz pairs checked on left end */ double Anorm; /* Norm estimate of the Laplacian matrix */ int pausemode; /* which Lanczos pausing criterion to use */ int pause; /* whether to pause */ int temp; /* used to prevent redundant index computations */ double *extvec; /* n-vector solving the extended A eigenproblem */ double *v; /* j-vector solving the extended T eigenproblem */ double extval=0.0; /* computed extended eigenvalue (of both A and T) */ double *work1, *work2; /* work vectors */ double check; /* to check an orthogonality condition */ double numerical_zero; /* used for zero in presense of round-off */ int ritzval_flag; /* status flag for get_ritzvals() */ double resid; /* residual */ int memory_ok; /* TRUE until memory runs out */ float *vwsqrt_float = NULL; /* float version of vwsqrt */ struct orthlink_float *makeorthlnk_float(); /* makes space for new entry in orthog. set */ struct scanlink *mkscanlist(); /* init scan list for min ritz vecs */ double *mkvec(); /* allocates space for a vector */ float *mkvec_float(); /* allocates space for a vector */ float *mkvec_ret_float(); /* mkvec() which returns error code */ double dot_float(); /* standard dot product routine */ double ch_norm(); /* vector norm */ double norm_float(); /* vector norm */ double Tevec(); /* calc eigenvector of T by linear recurrence */ double lanc_seconds(); /* switcheable timer */ /* free allocated memory safely */ int lanpause_float(); /* figure when to pause Lanczos iteration */ int get_ritzvals(); /* compute eigenvalues of T */ void setvec(); /* initialize a vector */ void setvec_float(); /* initialize a vector */ void vecscale_float(); /* scale a vector */ void splarax(); /* matrix vector multiply */ void splarax_float(); /* matrix vector multiply */ void update_float(); /* add scalar multiple of a vector to another */ void sorthog_float(); /* orthogonalize vector against list of others */ void bail(); /* our exit routine */ void scanmin(); /* store small values of vector in linked list */ void frvec(); /* free vector */ void frvec_float(); /* free vector */ void scadd(); /* add scalar multiple of vector to another */ void scadd_float(); /* add scalar multiple of vector to another */ void scadd_mixed(); /* add scalar multiple of vector to another */ void orthog1_float(); /* efficiently orthog. against vector of ones */ void solistout_float(); /* print out orthogonalization list */ void doubleout(); /* print a double precision number */ void orthogvec_float(); /* orthogonalize one vector against another */ void double_to_float(); /* copy a double vector to a float vector */ void get_extval(); /* find extended Ritz values */ void scale_diag(); /* scale vector by diagonal matrix */ void scale_diag_float(); /* scale vector by diagonal matrix */ void strout(); /* print string to screen and file */ if (DEBUG_TRACE > 0) { printf("<Entering lanczos_ext_float>\n"); } if (DEBUG_EVECS > 0) { printf("Selective orthogonalization Lanczos for extended eigenproblem, matrix size = %d.\n", n); } /* Initialize time. */ time = lanc_seconds(); if (d != 1) { bail("ERROR: Extended Lanczos only available for bisection.",1); /* ... something must be wrong upstream. */ } if (n < d + 1) { bail("ERROR: System too small for number of eigenvalues requested.",1); /* ... d+1 since don't use zero eigenvalue pair */ } /* Allocate space. */ maxj = LANCZOS_MAXITNS; u = mkvec_float(1, n); u_double = mkvec(1, n); r = mkvec_float(1, n); workn = mkvec_float(1, n); workn_double = mkvec(1, n); Ares = mkvec(0, d); index = smalloc((d + 1) * sizeof(int)); alpha = mkvec(1, maxj); beta = mkvec(0, maxj); ritz = mkvec(1, maxj); s = mkvec(1, maxj); bj = mkvec(1, maxj); workj = mkvec(0, maxj); q = smalloc((maxj + 1) * sizeof(float *)); solist = smalloc((maxj + 1) * sizeof(struct orthlink_float *)); scanlist = mkscanlist(d); extvec = mkvec(1, n); v = mkvec(1, maxj); work1 = mkvec(1, maxj); work2 = mkvec(1, maxj); /* Set some constants governing orthogonalization */ ngood = 0; maxngood = 0; bji_tol = eigtol; Anorm = 2 * maxdeg; /* Gershgorin estimate for ||A|| */ goodtol = Anorm * sqrt(DOUBLE_EPSILON); /* Parlett & Scott's bound, p.224 */ interval = 2 + (int) min(LANCZOS_SO_INTERVAL - 2, n / (2 * LANCZOS_SO_INTERVAL)); bis_safety = BISECTION_SAFETY; numerical_zero = 1.0e-6; if (DEBUG_EVECS > 0) { printf(" maxdeg %g\n", maxdeg); printf(" goodtol %g\n", goodtol); printf(" interval %d\n", interval); printf(" maxj %d\n", maxj); } /* Make a float copy of vwsqrt */ if (vwsqrt != NULL) { vwsqrt_float = mkvec_float(0,n); double_to_float(vwsqrt_float,1,n,vwsqrt); } /* Initialize space. */ double_to_float(r,1,n,gvec); if (vwsqrt_float != NULL) { scale_diag_float(r,1,n,vwsqrt_float); } check = norm_float(r,1,n); if (vwsqrt_float == NULL) { orthog1_float(r, 1, n); } else { orthogvec_float(r, 1, n, vwsqrt_float); } check = fabs(check - norm_float(r,1,n)); if (check > 10*numerical_zero && WARNING_EVECS > 0) { strout("WARNING: In terminal propagation, rhs should have no component in the"); printf(" nullspace of the Laplacian, so check val %g should be zero.\n", check); if (Output_File != NULL) { fprintf(Output_File, " nullspace of the Laplacian, so check val %g should be zero.\n", check); } } beta[0] = norm_float(r, 1, n); q[0] = mkvec_float(1, n); setvec_float(q[0], 1, n, 0.0); setvec(bj, 1, maxj, DOUBLE_MAX); if (beta[0] < numerical_zero) { /* The rhs vector, Dg, of the transformed problem is numerically zero or is in the null space of the Laplacian, so this is not a well posed extended eigenproblem. Set maxj to zero to force a quick exit but still clean-up memory and return(1) to indicate to eigensolve that it should call the default eigensolver routine for the standard eigenproblem. */ maxj = 0; } /* Main Lanczos loop. */ j = 1; lastpause = 0; pausemode = 1; left_ngood = 0; left_goodlim = 0; converged = FALSE; Sres_max = 0.0; inc_bis_safety = FALSE; nopauses = TRUE; memory_ok = TRUE; init_time += lanc_seconds() - time; while ((j <= maxj) && (!converged) && memory_ok) { time = lanc_seconds(); /* Allocate next Lanczos vector. If fail, back up to last pause. */ q[j] = mkvec_ret_float(1, n); if (q[j] == NULL) { memory_ok = FALSE; if (DEBUG_EVECS > 0 || WARNING_EVECS > 0) { strout("WARNING: Lanczos_ext out of memory; computing best approximation available.\n"); } if (nopauses) { bail("ERROR: Sorry, can't salvage Lanczos_ext.",1); /* ... save yourselves, men. */ } for (i = lastpause+1; i <= j-1; i++) { frvec_float(q[i], 1); } j = lastpause; } /* Basic Lanczos iteration */ vecscale_float(q[j], 1, n, (float)(1.0 / beta[j - 1]), r); blas_time += lanc_seconds() - time; time = lanc_seconds(); splarax_float(u, A, n, q[j], vwsqrt_float, workn); splarax_time += lanc_seconds() - time; time = lanc_seconds(); update_float(r, 1, n, u, (float)(-beta[j - 1]), q[j - 1]); alpha[j] = dot_float(r, 1, n, q[j]); update_float(r, 1, n, r, (float)(-alpha[j]), q[j]); blas_time += lanc_seconds() - time; /* Selective orthogonalization */ time = lanc_seconds(); if (vwsqrt_float == NULL) { orthog1_float(r, 1, n); } else { orthogvec_float(r, 1, n, vwsqrt_float); } if ((j == (lastpause + 1)) || (j == (lastpause + 2))) { sorthog_float(r, n, solist, ngood); } orthog_time += lanc_seconds() - time; beta[j] = norm_float(r, 1, n); time = lanc_seconds(); pause = lanpause_float(j, lastpause, interval, q, n, &pausemode, version, beta[j]); pause_time += lanc_seconds() - time; if (pause) { nopauses = FALSE; lastpause = j; /* Compute limits for checking Ritz pair convergence. */ if (version == 2) { if (left_ngood + 2 > left_goodlim) { left_goodlim = left_ngood + 2; } } /* Special case: need at least d Ritz vals on left. */ left_goodlim = max(left_goodlim, d); /* Special case: can't find more than j total Ritz vals. */ if (left_goodlim > j) { left_goodlim = min(left_goodlim, j); } /* Find Ritz vals using faster of Sturm bisection or ql. */ time = lanc_seconds(); if (inc_bis_safety) { bis_safety *= 10; inc_bis_safety = FALSE; } ritzval_flag = get_ritzvals(alpha, beta, j, Anorm, workj, ritz, d, left_goodlim, 0, eigtol, bis_safety); ql_time += lanc_seconds() - time; if (ritzval_flag != 0) { bail("ERROR: Lanczos_ext failed in computing eigenvalues of T.",1); /* ... we recover from this in lanczos_SO, but don't worry here. */ } /* Scan for minimum evals of tridiagonal. */ time = lanc_seconds(); scanmin(ritz, 1, j, &scanlist); scan_time += lanc_seconds() - time; /* Compute Ritz pair bounds at left end. */ time = lanc_seconds(); setvec(bj, 1, j, 0.0); for (i = 1; i <= left_goodlim; i++) { Sres = Tevec(alpha, beta - 1, j, ritz[i], s); if (Sres > Sres_max) { Sres_max = Sres; } if (Sres > SRESTOL) { inc_bis_safety = TRUE; } bj[i] = s[j] * beta[j]; } ritz_time += lanc_seconds() - time; /* Show the portion of the spectrum checked for convergence. */ if (DEBUG_EVECS > 2) { time = lanc_seconds(); printf("\nindex Ritz vals bji bounds\n"); for (i = 1; i <= left_goodlim; i++) { printf(" %3d", i); doubleout(ritz[i], 1); doubleout(bj[i], 1); printf("\n"); } printf("\n"); curlnk = scanlist; while (curlnk != NULL) { temp = curlnk->indx; if ((temp > left_goodlim) && (temp < j)) { printf(" %3d", temp); doubleout(ritz[temp], 1); doubleout(bj[temp], 1); printf("\n"); } curlnk = curlnk->pntr; } printf(" -------------------\n"); printf(" goodtol: %19.16f\n\n", goodtol); debug_time += lanc_seconds() - time; } get_extval(alpha, beta, j, ritz[1], s, eigtol, beta[0], sigma, &extval, v, work1, work2); /* check convergence of Ritz pairs */ time = lanc_seconds(); converged = TRUE; if (j < d) converged = FALSE; else { curlnk = scanlist; while (curlnk != NULL) { if (bj[curlnk->indx] > bji_tol) { converged = FALSE; } curlnk = curlnk->pntr; } } scan_time += lanc_seconds() - time; if (!converged) { ngood = 0; left_ngood = 0; /* for setting left_goodlim on next loop */ /* Compute converged Ritz pairs on left end */ time = lanc_seconds(); for (i = 1; i <= left_goodlim; i++) { if (bj[i] <= goodtol) { ngood += 1; left_ngood += 1; if (ngood > maxngood) { maxngood = ngood; solist[ngood] = makeorthlnk_float(); (solist[ngood])->vec = mkvec_float(1, n); } (solist[ngood])->index = i; Sres = Tevec(alpha, beta - 1, j, ritz[i], s); if (Sres > Sres_max) { Sres_max = Sres; } if (Sres > SRESTOL) { inc_bis_safety = TRUE; } setvec_float((solist[ngood])->vec, 1, n, 0.0); for (k = 1; k <= j; k++) { scadd_float((solist[ngood])->vec, 1, n, s[k], q[k]); } } } ritz_time += lanc_seconds() - time; if (DEBUG_EVECS > 2) { time = lanc_seconds(); printf(" j %3d; goodlim lft %2d, rgt %2d; list ", j, left_goodlim, 0); solistout_float(solist, n, ngood, j); printf("---------------------end of iteration---------------------\n\n"); debug_time += lanc_seconds() - time; } } } j++; } j--; if (DEBUG_EVECS > 0) { time = lanc_seconds(); if (maxj == 0) { printf("Not extended eigenproblem -- calling ordinary eigensolver.\n"); } else { printf(" Lanczos_ext itns: %d\n",j); printf(" eigenvalue: %g\n",ritz[1]); printf(" extended eigenvalue: %g\n",extval); } debug_time += lanc_seconds() - time; } if (maxj != 0) { /* Compute (scaled) extended eigenvector. */ time = lanc_seconds(); setvec(y[1], 1, n, 0.0); for (k = 1; k <= j; k++) { scadd_mixed(y[1], 1, n, v[k], q[k]); } evec_time += lanc_seconds() - time; /* Note: assign() will scale this y vector back to x (since y = Dx) */ /* Compute and check residual directly. Use the Ay = extval*y + Dg version of the problem for convenience. Note that u and v are used here as workspace */ time = lanc_seconds(); splarax(workn_double, A, n, y[1], vwsqrt, u_double); scadd(workn_double, 1, n, -extval, y[1]); scale_diag(gvec,1,n,vwsqrt); scadd(workn_double, 1, n, -1.0, gvec); resid = ch_norm(workn_double, 1, n); if (DEBUG_EVECS > 0) { printf(" extended residual: %g\n",resid); if (Output_File != NULL) { fprintf(Output_File, " extended residual: %g\n",resid); } } if (WARNING_EVECS > 0 && resid > eigtol) { printf("WARNING: Extended residual (%g) greater than tolerance (%g).\n", resid, eigtol); if (Output_File != NULL) { fprintf(Output_File, "WARNING: Extended residual (%g) greater than tolerance (%g).\n", resid, eigtol); } } debug_time += lanc_seconds() - time; } /* free up memory */ time = lanc_seconds(); frvec_float(u, 1); frvec(u_double, 1); frvec_float(r, 1); frvec_float(workn, 1); frvec(workn_double, 1); frvec(Ares, 0); sfree(index); frvec(alpha, 1); frvec(beta, 0); frvec(ritz, 1); frvec(s, 1); frvec(bj, 1); frvec(workj, 0); for (i = 0; i <= j; i++) { frvec_float(q[i], 1); } sfree(q); while (scanlist != NULL) { curlnk = scanlist->pntr; sfree(scanlist); scanlist = curlnk; } for (i = 1; i <= maxngood; i++) { frvec_float((solist[i])->vec, 1); sfree(solist[i]); } sfree(solist); frvec(extvec, 1); frvec(v, 1); frvec(work1, 1); frvec(work2, 1); if (vwsqrt != NULL) frvec_float(vwsqrt_float, 1); init_time += lanc_seconds() - time; if (maxj == 0) return(1); /* see note on beta[0] and maxj above */ else return(0); }