Esempio n. 1
0
File: numrec.c Progetto: nasa/QuIP
static void float_sort_svd_eigenvectors(Data_Obj *u_dp, Data_Obj *w_dp, Data_Obj *v_dp)
{
	//SORT_EIGENVECTORS(float)
	dimension_t i,n;
	u_long *index_data;

	n = OBJ_COLS(w_dp);

	index_dp = mkvec("svd_tmp_indices",n,1,PREC_UDI);
	if( index_dp == NULL ){
		NWARN("Unable to create index object for sorting SVD eigenvalues");
		return;
	}

	sort_indices(index_dp,w_dp);
	/* sorting is done from smallest to largest */

	j=0;
	index_data = OBJ_DATA_PTR(index_dp);

	/* We should only have to have 1 column of storage to permute things,
	 * but life is simplified if we copy the data...
	 */

	for(i=n-1;i>=0;i--){
		k= *(index_data+i);
	}
}
Esempio n. 2
0
/* return the roots of pol in nf */
GEN
nfroots(GEN nf,GEN pol)
{
  pari_sp av = avma;
  GEN A,g, T;
  long d;

  if (!nf) return nfrootsQ(pol);

  nf = checknf(nf); T = gel(nf,1);
  if (typ(pol) != t_POL) pari_err(notpoler,"nfroots");
  if (varncmp(varn(pol), varn(T)) >= 0)
    pari_err(talker,"polynomial variable must have highest priority in nfroots");
  d = degpol(pol);
  if (d == 0) return cgetg(1,t_VEC);
  if (d == 1)
  {
    A = gneg_i(gdiv(gel(pol,2),gel(pol,3)));
    return gerepilecopy(av, mkvec( basistoalg(nf,A) ));
  }
  A = fix_relative_pol(nf,pol,0);
  A = Q_primpart( lift_intern(A) );
  if (DEBUGLEVEL>3) fprintferr("test if polynomial is square-free\n");
  g = nfgcd(A, derivpol(A), T, gel(nf,4));

  if (degpol(g))
  { /* not squarefree */
    g = QXQX_normalize(g, T);
    A = RgXQX_div(A,g,T);
  }
  A = QXQX_normalize(A, T);
  A = Q_primpart(A);
  A = nfsqff(nf,A,1);
  A = RgXQV_to_mod(A, T);
  return gerepileupto(av, gen_sort(A, 0, cmp_pol));
}
Esempio n. 3
0
GEN
F2xq_ellgens(GEN a2, GEN a6, GEN ch, GEN D, GEN m, GEN T)
{
  GEN P;
  pari_sp av = avma;
  struct _F2xqE e;
  e.a2=a2; e.a6=a6; e.T=T;
  switch(lg(D)-1)
  {
  case 0:
    return cgetg(1,t_VEC);
  case 1:
    P = gen_gener(gel(D,1), (void*)&e, &F2xqE_group);
    P = mkvec(F2xqE_changepoint(P, ch, T));
    break;
  default:
    P = gen_ellgens(gel(D,1), gel(D,2), m, (void*)&e, &F2xqE_group,
                                                      _F2xqE_pairorder);
    gel(P,1) = F2xqE_changepoint(gel(P,1), ch, T);
    gel(P,2) = F2xqE_changepoint(gel(P,2), ch, T);
    break;
  }
  return gerepilecopy(av, P);
}
Esempio n. 4
0
/* Finds eigenvector s of T and returns residual norm. */
double 
Tevec (
    double *alpha,		/* vector of Lanczos scalars */
    double *beta,			/* vector of Lanczos scalars */
    int j,			/* number of Lanczos iterations taken */
    double ritz,			/* approximate eigenvalue  of T */
    double *s			/* approximate eigenvector of T */
)
{
    extern double SRESTOL;      /* limit on relative residual tol for evec of T */
    extern double DOUBLE_MAX;	/* maximum double precision value */
    int       i;		/* index */
    double    residual=0.0;	/* how well recurrence gives eigenvector */
    double    temp;		/* used to compute residual */
    double   *work;		/* temporary work vector allocated within if used */
    double    w[MAXDIMS + 1];	/* holds eigenvalue for tinvit */
    long      index[MAXDIMS +1];/* index vector for tinvit */
    long      ierr;		/* error flag for tinvit */
    long      nevals;		/* number of evals sought */
    long      long_j;		/* long copy of j for tinvit interface */
    double    hurdle;		/* hurdle for local maximum in recurrence */
    double    prev_resid;	/* stores residual from previous computation */

    int       tinvit_();	/* eispack's tinvit for evecs of symmetric T */
    double   *mkvec();		/* allocates double vectors */
    void      frvec();		/* frees double vectors */
    double    bidir();		/* bidirectional recurrence for evec of T */
    void      cpvec();		/* vector copy routine */

	s[1] = 1.0;

	if (j == 1) {
	    residual = fabs(alpha[1] - ritz);
	}

	if (j >= 2) { 
	    /*Bidirectional recurrence - corrected and modified from Parlett and Reid, 
              "Tracking the Progress of the Lanczos Algorithm ..., IMA JNA 1, 1981 */
	    hurdle = 1.0;
	    residual = bidir(alpha,beta,j,ritz,s,hurdle);
	}

	if (residual > SRESTOL) {
	    /* Try again with Eispack's Tinvit iteration */
	    SRES_SWITCHES++;
	    index[1] = 1;
	    work = mkvec(1, 7*j);	/* lump things to save mallocs */
	    w[1] = ritz;
	    work[1] = 0;
	    for (i = 2; i <= j; i++) {
	        work[i] = beta[i] * beta[i];
	    }
	    nevals = 1;
	    long_j = j;

	    /* save the previously computed evec in case it's better */ 
	    cpvec(&(work[6*j]),1,j,s);
	    prev_resid = residual;

	    tinvit_(&long_j, &long_j, &(alpha[1]), &(beta[1]), &(work[1]), &nevals,
		&(w[1]), &(index[1]), &(s[1]), &ierr, &(work[j+1]), &(work[(2*j)+1]), 
	    	&(work[(3*j)+1]), &(work[(4*j)+1]), &(work[(5*j)+1]));

	    /* fix up sign if needed */
	    if (s[j] < 0) {
	        for(i=1; i<=j; i++) {
		    s[i] = - s[i];
	        }
	    }

	    if (ierr != 0) {
	        residual = DOUBLE_MAX;
	        /* ... don't want to use evec since it is set to zero */
	    }

	    else {
	        temp = (alpha[1] - ritz) * s[1] + beta[2] * s[2];
	        residual = temp * temp;
	        for (i = 2; i < j; i++) {
		    temp = beta[i] * s[i - 1] + (alpha[i] - ritz) * s[i] 
                         + beta[i + 1] * s[i + 1];
		    residual += temp * temp;
	        }
	        temp = beta[j] * s[j - 1] + (alpha[j] - ritz) * s[j];
	        residual += temp * temp;
	        residual = sqrt(residual);
	        /* tinvit normalizes, so we don't need to. */
	    }

	    /* restore previous evec if it had a better residual */
	    if (prev_resid < residual) {
	        residual = prev_resid;
	        cpvec(s,1,j,&(work[6*j]));
	    	SRES_SWITCHES++; /* count since switching back as well */
	    }

	    frvec(work, 1);
	}

    return (residual);
}
Esempio n. 5
0
static double dist(pnt a, pnt b) {
    vec x;
    mkvec(a, b, x);
    return sqrt(dot(x,x));
}
Esempio n. 6
0
static int addlayer(double h,
	int *pnV, double *vertex,
	int nE, int *edge, int *edgematerial,
	int *pnF, int *face, int *facematerial,
	int *pnT, int *tetra, int *tetramaterial,
	int *pnFa, int *facea, int *facemata,
	int nnV, int nnF, int nnT
) {
    int nV = *pnV,  nF = *pnF,  nT = *pnT,  nFa = *pnFa;
    int removed = 0, i, j, k;
    vec x, a, b;
    double *mass;
    double m;
    int v1, v2, d, layers = ceil(h/size);
    int nFF = nF;
    h = h / layers;
    if ((1+layers)*nV+1+layers > nnV)  {
	printf("Please increase max nV\n");
	return -1;
    }
    mass = (double*)malloc(sizeof(double)*(nV+1));
    for (i=1; i<=layers*nV; i++)  vertex[3*(nV+i)+0] = 0.0,  vertex[3*(nV+i)+1] = 0.0,  vertex[3*(nV+i)+2] = 0.0;
    for (i=1; i<=nV; i++)  mass[i] = 0.0;
    for (i=0; i<nF; i++) {
	if (facematerial[i] > 10) {
	    mkvec(vertex+3*face[3*i+2], vertex+3*face[3*i+1], a);
	    mkvec(vertex+3*face[3*i+2], vertex+3*face[3*i+0], b);
	    m = vp(a, b, x);
	    for (j=0; j<3; j++)  addvec(1.0, vertex+3*(nV+face[3*i+j]), 1.0, x),  mass[face[3*i+j]] += m;
	}
    }
    for (i=1; i<=nV; i++) {
	if (mass[i] > 0.0) {
	    for (k=layers; k>1; k--)  addvec(0.0, vertex+3*(k*nV+i), 1.0, vertex+3*(nV+i));
	    for (k=layers; k>0; k--)  addvec(k*h/mass[i], vertex+3*(k*nV+i), 1.0, vertex+3*i);
	}
    }
    for (i=0; i<nFF; i++) {
	if (facematerial[i] > 10) {
	    j = layers;
	    for (k=0; k<j; k++)  addprism(&nT, tetra, tetramaterial, k*nV+face[3*i+0], k*nV+face[3*i+1], k*nV+face[3*i+2], nV, 7);
	    facea[3*nFa+0] = j*nV+face[3*i+2],  facea[3*nFa+1] = j*nV+face[3*i+1],  facea[3*nFa+2] = j*nV+face[3*i+0],  facemata[nFa] = facematerial[i],  nFa++;
	    removed++;
	    nF--, nFF--;
	    if (nFF>0) {
		face[3*i+0] = face[3*nFF+0];
		face[3*i+1] = face[3*nFF+1];
		face[3*i+2] = face[3*nFF+2];
		facematerial[i] = facematerial[nFF];
		face[3*nFF+0] = face[3*nF+0];
		face[3*nFF+1] = face[3*nF+1];
		face[3*nFF+2] = face[3*nF+2];
		facematerial[nFF] = facematerial[nF];
	    }
	    i--;
	}
    }
    for (i=0; i<nE; i++) {
	v1 = (edge[2*i+0] < edge[2*i+1]) ? edge[2*i+0] : edge[2*i+1];
	v2 = (edge[2*i+0] > edge[2*i+1]) ? edge[2*i+0] : edge[2*i+1];
	d = (edge[2*i+0] < edge[2*i+1]) ? 1 : 0;
	if (edgematerial[i])  for (k=0; k<layers; k++) {
	    facea[3*nFa+d] = k*nV+v1,  facea[3*nFa+1-d] = (k+1)*nV+v1,  facea[3*nFa+2] = k*nV+v2,  facemata[nFa] = edgematerial[i],  nFa++;
	    facea[3*nFa+d] = (k+1)*nV+v2,  facea[3*nFa+1-d] = k*nV+v2,  facea[3*nFa+2] = (k+1)*nV+v1,  facemata[nFa] = edgematerial[i],  nFa++;
	}
	edge[2*i+0] += layers*nV,  edge[2*i+1] += layers*nV;
    }
    nV += layers*nV;
    free(mass);
    *pnV = nV;
    *pnF = nF;
    *pnT = nT;
    *pnFa = nFa;
    return removed;
    (void) nnF,  (void) nnT;
    (void) dist;
}
Esempio n. 7
0
/* Benchmark certain kernel operations */
void 
time_kernels (
    struct vtx_data **A,		/* matrix/graph being analyzed */
    int n,			/* number of rows/columns in matrix */
    double *vwsqrt		/* square roots of vertex weights */
)
{
    extern int DEBUG_PERTURB;	/* debug flag for matrix perturbation */
    extern int PERTURB;		/* randomly perturb to break symmetry? */
    extern int NPERTURB;	/* number of edges to perturb */
    extern int DEBUG_TRACE;	/* trace main execution path */
    extern double PERTURB_MAX;	/* maximum size of perturbation */
    int       i, beg, end;
    double   *dvec1, *dvec2, *dvec3;
    float    *svec1, *svec2, *svec3, *vwsqrt_float;
    double    norm_dvec, norm_svec;
    double    dot_dvec, dot_svec;
    double    time, time_dvec, time_svec;
    double    diff;
    double    factor, fac;
    float     factor_float, fac_float;
    int       loops;
    double    min_time, target_time;

    double   *mkvec();
    float    *mkvec_float();
    void      frvec(), frvec_float();
    void      vecran();
    double    ch_norm(), dot();
    double    norm_float(), dot_float();
    double    seconds();
    void      scadd(), scadd_float(), update(), update_float();
    void      splarax(), splarax_float();
    void      perturb_init(), perturb_clear();

    if (DEBUG_TRACE > 0) {
	printf("<Entering time_kernels>\n");
    }

    beg = 1;
    end = n;

    dvec1 = mkvec(beg, end);
    dvec2 = mkvec(beg, end);
    dvec3 = mkvec(beg - 1, end);
    svec1 = mkvec_float(beg, end);
    svec2 = mkvec_float(beg, end);
    svec3 = mkvec_float(beg - 1, end);

    if (vwsqrt == NULL) {
	vwsqrt_float = NULL;
    }
    else {
        vwsqrt_float = mkvec_float(beg - 1, end);
	for (i = beg - 1; i <= end; i++) {
	    vwsqrt_float[i] = vwsqrt[i];
	}
    }

    vecran(dvec1, beg, end);
    vecran(dvec2, beg, end);
    vecran(dvec3, beg, end);
    for (i = beg; i <= end; i++) {
	svec1[i] = dvec1[i];
	svec2[i] = dvec2[i];
	svec3[i] = dvec3[i];
    }

    /* Set number of loops so that ch_norm() takes about one second. This should
       insulate against inaccurate timings on faster machines. */

    loops = 1;
    time_dvec = 0;
    min_time = 0.5;
    target_time = 1.0;
    while (time_dvec < min_time) {
	time = seconds();
	for (i = loops; i; i--) {
	    norm_dvec = ch_norm(dvec1, beg, end);
	}
	time_dvec = seconds() - time;
	if (time_dvec < min_time) {
	    loops = 10 * loops;
	}
    }
    loops = (target_time / time_dvec) * loops;
    if (loops < 1)
	loops = 1;

    printf("                Kernel benchmarking\n");
    printf("Time (in seconds) for %d loops of each operation:\n\n", loops);

    printf("Routine      Double     Float      Discrepancy      Description\n");
    printf("-------      ------     -----      -----------      -----------\n");


    /* Norm operation */
    time = seconds();
    for (i = loops; i; i--) {
	norm_dvec = ch_norm(dvec1, beg, end);
    }
    time_dvec = seconds() - time;

    time = seconds();
    for (i = loops; i; i--) {
	norm_svec = norm_float(svec1, beg, end);
    }
    time_svec = seconds() - time;

    diff = norm_dvec - norm_svec;
    printf("norm        %6.2f    %6.2f    %14.5e", time_dvec, time_svec, diff);
    printf("      2 norm\n");


    /* Dot operation */
    time = seconds();
    for (i = loops; i; i--) {
	dot_dvec = dot(dvec1, beg, end, dvec2);
    }
    time_dvec = seconds() - time;

    time = seconds();
    for (i = loops; i; i--) {
	dot_svec = dot_float(svec1, beg, end, svec2);
    }
    time_svec = seconds() - time;

    diff = dot_dvec - dot_svec;
    printf("dot         %6.2f    %6.2f    %14.5e", time_dvec, time_svec, diff);
    printf("      scalar product\n");


    /* Scadd operation */
    factor = 1.01;
    factor_float = factor;

    fac = factor;
    time = seconds();
    for (i = loops; i; i--) {
	scadd(dvec1, beg, end, fac, dvec2);
	fac = -fac;		/* to keep things in scale */
    }
    time_dvec = seconds() - time;

    fac_float = factor_float;
    time = seconds();
    for (i = loops; i; i--) {
	scadd_float(svec1, beg, end, fac_float, svec2);
	fac_float = -fac_float;	/* to keep things in scale */
    }
    time_svec = seconds() - time;

    diff = checkvec(dvec1, beg, end, svec1);
    printf("scadd       %6.2f    %6.2f    %14.5e", time_dvec, time_svec, diff);
    printf("      vec1 <- vec1 + alpha*vec2\n");


    /* Update operation */
    time = seconds();
    for (i = loops; i; i--) {
	update(dvec1, beg, end, dvec2, factor, dvec3);
    }
    time_dvec = seconds() - time;

    time = seconds();
    for (i = loops; i; i--) {
	update_float(svec1, beg, end, svec2, factor_float, svec3);
    }
    time_svec = seconds() - time;

    diff = checkvec(dvec1, beg, end, svec1);
    printf("update      %6.2f    %6.2f    %14.2g", time_dvec, time_svec, diff);
    printf("      vec1 <- vec2 + alpha*vec3\n");

    /* splarax operation */
    if (PERTURB) {
	if (NPERTURB > 0 && PERTURB_MAX > 0.0) {
	    perturb_init(n);
	    if (DEBUG_PERTURB > 0) {
		printf("Matrix being perturbed with scale %e\n", PERTURB_MAX);
	    }
	}
	else if (DEBUG_PERTURB > 0) {
	    printf("Matrix not being perturbed\n");
	}
    }

    time = seconds();
    for (i = loops; i; i--) {
	splarax(dvec1, A, n, dvec2, vwsqrt, dvec3);
    }
    time_dvec = seconds() - time;

    time = seconds();
    for (i = loops; i; i--) {
	splarax_float(svec1, A, n, svec2, vwsqrt_float, svec3);
    }

    time_svec = seconds() - time;

    diff = checkvec(dvec1, beg, end, svec1);
    printf("splarax     %6.2f    %6.2f    %14.5e", time_dvec, time_svec, diff);
    printf("      sparse matrix vector multiply\n");

    if (PERTURB && NPERTURB > 0 && PERTURB_MAX > 0.0) {
	perturb_clear();
    }
    printf("\n");

    /* Free memory */
    frvec(dvec1, 1);
    frvec(dvec2, 1);
    frvec(dvec3, 0);
    frvec_float(svec1, 1);
    frvec_float(svec2, 1);
    frvec_float(svec3, 0);
    if (vwsqrt_float != NULL) {
        frvec_float(vwsqrt_float, beg - 1);
    }
}
Esempio n. 8
0
/* Naive recombination of modular factors: combine up to maxK modular
 * factors, degree <= klim and divisible by hint
 *
 * target = polynomial we want to factor
 * famod = array of modular factors.  Product should be congruent to
 * target/lc(target) modulo p^a
 * For true factors: S1,S2 <= p^b, with b <= a and p^(b-a) < 2^31 */
static GEN
nfcmbf(nfcmbf_t *T, GEN p, long a, long maxK, long klim)
{
  GEN pol = T->pol, nf = T->nf, famod = T->fact, dn = T->dn;
  GEN bound = T->bound;
  GEN nfpol = gel(nf,1);
  long K = 1, cnt = 1, i,j,k, curdeg, lfamod = lg(famod)-1, dnf = degpol(nfpol);
  GEN res = cgetg(3, t_VEC);
  pari_sp av0 = avma;
  GEN pk = gpowgs(p,a), pks2 = shifti(pk,-1);

  GEN ind      = cgetg(lfamod+1, t_VECSMALL);
  GEN degpol   = cgetg(lfamod+1, t_VECSMALL);
  GEN degsofar = cgetg(lfamod+1, t_VECSMALL);
  GEN listmod  = cgetg(lfamod+1, t_COL);
  GEN fa       = cgetg(lfamod+1, t_COL);
  GEN lc = absi(leading_term(pol)), lt = is_pm1(lc)? NULL: lc;
  GEN C2ltpol, C = T->L->topowden, Tpk = T->L->Tpk;
  GEN Clt  = mul_content(C, lt);
  GEN C2lt = mul_content(C,Clt);
  const double Bhigh = get_Bhigh(lfamod, dnf);
  trace_data _T1, _T2, *T1, *T2;
  pari_timer ti;

  TIMERstart(&ti);

  if (maxK < 0) maxK = lfamod-1;

  C2ltpol = C2lt? gmul(C2lt,pol): pol;
  {
    GEN q = ceil_safe(sqrtr(T->BS_2));
    GEN t1,t2, ltdn, lt2dn;
    GEN trace1   = cgetg(lfamod+1, t_MAT);
    GEN trace2   = cgetg(lfamod+1, t_MAT);

    ltdn = mul_content(lt, dn);
    lt2dn= mul_content(ltdn, lt);

    for (i=1; i <= lfamod; i++)
    {
      pari_sp av = avma;
      GEN P = gel(famod,i);
      long d = degpol(P);

      degpol[i] = d; P += 2;
      t1 = gel(P,d-1);/* = - S_1 */
      t2 = gsqr(t1);
      if (d > 1) t2 = gsub(t2, gmul2n(gel(P,d-2), 1));
      /* t2 = S_2 Newton sum */
      t2 = typ(t2)!=t_INT? FpX_rem(t2, Tpk, pk): modii(t2, pk);
      if (lt)
      {
        if (typ(t2)!=t_INT) {
          t1 = FpX_red(gmul(ltdn, t1), pk);
          t2 = FpX_red(gmul(lt2dn,t2), pk);
        } else {
          t1 = remii(mulii(ltdn, t1), pk);
          t2 = remii(mulii(lt2dn,t2), pk);
        }
      }
      gel(trace1,i) = gclone( nf_bestlift(t1, NULL, T->L) );
      gel(trace2,i) = gclone( nf_bestlift(t2, NULL, T->L) ); avma = av;
    }
    T1 = init_trace(&_T1, trace1, T->L, q);
    T2 = init_trace(&_T2, trace2, T->L, q);
    for (i=1; i <= lfamod; i++) { 
      gunclone(gel(trace1,i));
      gunclone(gel(trace2,i));
    }
  }
  degsofar[0] = 0; /* sentinel */

  /* ind runs through strictly increasing sequences of length K,
   * 1 <= ind[i] <= lfamod */
nextK:
  if (K > maxK || 2*K > lfamod) goto END;
  if (DEBUGLEVEL > 3)
    fprintferr("\n### K = %d, %Z combinations\n", K,binomial(utoipos(lfamod), K));
  setlg(ind, K+1); ind[1] = 1;
  i = 1; curdeg = degpol[ind[1]];
  for(;;)
  { /* try all combinations of K factors */
    for (j = i; j < K; j++)
    {
      degsofar[j] = curdeg;
      ind[j+1] = ind[j]+1; curdeg += degpol[ind[j+1]];
    }
    if (curdeg <= klim && curdeg % T->hint == 0) /* trial divide */
    {
      GEN t, y, q, list;
      pari_sp av;

      av = avma;
      /* d - 1 test */
      if (T1)
      {
        t = get_trace(ind, T1);
        if (rtodbl(QuickNormL2(t,DEFAULTPREC)) > Bhigh)
        {
          if (DEBUGLEVEL>6) fprintferr(".");
          avma = av; goto NEXT;
        }
      }
      /* d - 2 test */
      if (T2)
      {
        t = get_trace(ind, T2);
        if (rtodbl(QuickNormL2(t,DEFAULTPREC)) > Bhigh)
        {
          if (DEBUGLEVEL>3) fprintferr("|");
          avma = av; goto NEXT;
        }
      }
      avma = av;
      y = lt; /* full computation */
      for (i=1; i<=K; i++)
      {
        GEN q = gel(famod, ind[i]);
        if (y) q = gmul(y, q);
        y = FqX_centermod(q, Tpk, pk, pks2);
      }
      y = nf_pol_lift(y, bound, T);
      if (!y)
      {
        if (DEBUGLEVEL>3) fprintferr("@");
        avma = av; goto NEXT;
      }
      /* try out the new combination: y is the candidate factor */
      q = RgXQX_divrem(C2ltpol, y, nfpol, ONLY_DIVIDES);
      if (!q)
      {
        if (DEBUGLEVEL>3) fprintferr("*");
        avma = av; goto NEXT;
      }

      /* found a factor */
      list = cgetg(K+1, t_VEC);
      gel(listmod,cnt) = list;
      for (i=1; i<=K; i++) list[i] = famod[ind[i]];

      y = Q_primpart(y);
      gel(fa,cnt++) = QXQX_normalize(y, nfpol);
      /* fix up pol */
      pol = q;
      for (i=j=k=1; i <= lfamod; i++)
      { /* remove used factors */
        if (j <= K && i == ind[j]) j++;
        else
        {
          famod[k] = famod[i];
          update_trace(T1, k, i);
          update_trace(T2, k, i);
          degpol[k] = degpol[i]; k++;
        }
      }
      lfamod -= K;
      if (lfamod < 2*K) goto END;
      i = 1; curdeg = degpol[ind[1]];

      if (C2lt) pol = Q_primpart(pol);
      if (lt) lt = absi(leading_term(pol));
      Clt  = mul_content(C, lt);
      C2lt = mul_content(C,Clt);
      C2ltpol = C2lt? gmul(C2lt,pol): pol;
      if (DEBUGLEVEL > 2)
      {
        fprintferr("\n"); msgTIMER(&ti, "to find factor %Z",y);
        fprintferr("remaining modular factor(s): %ld\n", lfamod);
      }
      continue;
    }

NEXT:
    for (i = K+1;;)
    {
      if (--i == 0) { K++; goto nextK; }
      if (++ind[i] <= lfamod - K + i)
      {
        curdeg = degsofar[i-1] + degpol[ind[i]];
        if (curdeg <= klim) break;
      }
    }
  }
END:
  if (degpol(pol) > 0)
  { /* leftover factor */
    if (signe(leading_term(pol)) < 0) pol = gneg_i(pol);

    if (C2lt && lfamod < 2*K) pol = QXQX_normalize(Q_primpart(pol), nfpol);
    setlg(famod, lfamod+1);
    gel(listmod,cnt) = shallowcopy(famod);
    gel(fa,cnt++) = pol;
  }
  if (DEBUGLEVEL>6) fprintferr("\n");
  if (cnt == 2) { 
    avma = av0; 
    gel(res,1) = mkvec(T->pol);
    gel(res,2) = mkvec(T->fact);
  }
  else
  {
    setlg(listmod, cnt); setlg(fa, cnt);
    gel(res,1) = fa;
    gel(res,2) = listmod;
    res = gerepilecopy(av0, res);
  }
  return res;
}
Esempio n. 9
0
/* return the factorization of the square-free polynomial x.
   The coeffs of x are in Z_nf and its leading term is a rational integer.
   deg(x) > 1, deg(nfpol) > 1
   If fl = 1, return only the roots of x in nf
   If fl = 2, as fl=1 if pol splits, [] otherwise */
static GEN
nfsqff(GEN nf, GEN pol, long fl)
{
  long n, nbf, dpol = degpol(pol);
  GEN pr, C0, polbase, init_fa = NULL;
  GEN N2, rep, polmod, polred, lt, nfpol = gel(nf,1);
  nfcmbf_t T;
  nflift_t L;
  pari_timer ti, ti_tot;

  if (DEBUGLEVEL>2) { TIMERstart(&ti); TIMERstart(&ti_tot); }
  n = degpol(nfpol);
  polbase = unifpol(nf, pol, t_COL);
  if (typ(polbase) != t_POL) pari_err(typeer, "nfsqff");
  polmod  = lift_intern( unifpol(nf, pol, t_POLMOD) );
  if (dpol == 1) return mkvec(QXQX_normalize(polmod, nfpol));
  /* heuristic */
  if (dpol*3 < n) 
  {
    GEN z, t;
    long i;
    if (DEBUGLEVEL>2) fprintferr("Using Trager's method\n");
    z = (GEN)polfnf(polmod, nfpol)[1];
    if (fl) {
      long l = lg(z);
      for (i = 1; i < l; i++)
      {
        t = gel(z,i); if (degpol(t) > 1) break;
        gel(z,i) = gneg(gdiv(gel(t,2), gel(t,3)));
      }
      setlg(z, i);
      if (fl == 2 && i != l) return cgetg(1,t_VEC);
    }
    return z;
  }

  nbf = nf_pick_prime(5, nf, polbase, fl, &lt, &init_fa, &pr, &L.Tp);
  if (fl == 2 && nbf < dpol) return cgetg(1,t_VEC);
  if (nbf <= 1)
  {
    if (!fl) return mkvec(QXQX_normalize(polmod, nfpol)); /* irreducible */
    if (!nbf) return cgetg(1,t_VEC); /* no root */
  }

  if (DEBUGLEVEL>2) {
    msgTIMER(&ti, "choice of a prime ideal");
    fprintferr("Prime ideal chosen: %Z\n", pr);
  }

  pol = simplify_i(lift(polmod));
  L.tozk = gel(nf,8);
  L.topow= Q_remove_denom(gel(nf,7), &L.topowden);
  T.ZC = L2_bound(nf, L.tozk, &(T.dn));
  T.Br = nf_root_bounds(pol, nf); if (lt) T.Br = gmul(T.Br, lt);

  if (fl) C0 = normlp(T.Br, 2, n);
  else    C0 = nf_factor_bound(nf, polbase); /* bound for T_2(Q_i), Q | P */
  T.bound = mulrr(T.ZC, C0); /* bound for |Q_i|^2 in Z^n on chosen Z-basis */

  N2 = mulsr(dpol*dpol, normlp(T.Br, 4, n)); /* bound for T_2(lt * S_2) */
  T.BS_2 = mulrr(T.ZC, N2); /* bound for |S_2|^2 on chosen Z-basis */

  if (DEBUGLEVEL>2) {
    msgTIMER(&ti, "bound computation");
    fprintferr("  1) T_2 bound for %s: %Z\n", fl?"root":"factor", C0);
    fprintferr("  2) Conversion from T_2 --> | |^2 bound : %Z\n", T.ZC);
    fprintferr("  3) Final bound: %Z\n", T.bound);
  }

  L.p = gel(pr,1);
  if (L.Tp && degpol(L.Tp) == 1) L.Tp = NULL;
  bestlift_init(0, nf, pr, T.bound, &L);
  if (DEBUGLEVEL>2) TIMERstart(&ti);
  polred = ZqX_normalize(polbase, lt, &L); /* monic */

  if (fl) {
    GEN z = nf_DDF_roots(pol, polred, nfpol, lt, init_fa, nbf, fl, &L);
    if (lg(z) == 1) return cgetg(1, t_VEC);
    return z;
  }

  {
    pari_sp av = avma;
    if (L.Tp)
      rep = FqX_split_all(init_fa, L.Tp, L.p);
    else
    {
      long d;
      rep = cgetg(dpol + 1, t_VEC); gel(rep,1) = FpX_red(polred,L.p);
      d = FpX_split_Berlekamp((GEN*)(rep + 1), L.p);
      setlg(rep, d + 1);
    }
    T.fact  = gerepilecopy(av, sort_vecpol(rep, &cmp_pol));
  }
  if (DEBUGLEVEL>2) msgTIMER(&ti, "splitting mod %Z", pr);
  T.pr = pr;
  T.L  = &L;
  T.polbase = polbase;
  T.pol   = pol;
  T.nf    = nf;
  T.hint  = 1; /* useless */

  rep = nf_combine_factors(&T, polred, L.p, L.k, dpol-1);
  if (DEBUGLEVEL>2)
    fprintferr("Total Time: %ld\n===========\n", TIMER(&ti_tot));
  return rep;
}
Esempio n. 10
0
/* Finds needed eigenvalues of tridiagonal T using either the QL algorithm
   or Sturm sequence bisection, whichever is predicted to be faster based
   on a simple complexity model. If one fails (which is rare), the other
   is tried. The return value is 0 if one of the routines succeeds. If they
   both fail, the return value is 1, and Lanczos should compute the best
   approximation it can based on previous iterations. */
int get_ritzvals(double *alpha,         /* vector of Lanczos scalars */
                 double *beta,          /* vector of Lanczos scalars */
                 int     j,             /* number of Lanczos iterations taken */
                 double  Anorm,         /* Gershgorin estimate */
                 double *workj,         /* work vector for Sturm sequence */
                 double *ritz,          /* array holding evals */
                 int     d,             /* problem dimension = num. eigenpairs needed */
                 int     left_goodlim,  /* number of ritz pairs checked on left end */
                 int     right_goodlim, /* number of ritz pairs checked on right end */
                 double  eigtol,        /* tolerance on eigenpair */
                 double  bis_safety     /* bisection tolerance function divisor */
                 )
{
  extern int DEBUG_EVECS;     /* debug flag for eigen computation */
  extern int WARNING_EVECS;   /* warning flag for eigen computation */
  int        nvals_left;      /* numb. evals to find on left end of spectrum */
  int        nvals_right;     /* numb. evals to find on right end of spectrum */
  double     bisection_tol;   /* width of interval bisection should converge to */
  int        pred_steps;      /* predicts # of required bisection steps per eval */
  int        tot_pred_steps;  /* predicts total # of required bisection steps */
  double *   ritz_sav = NULL; /* copy of ritzvals for debugging */
  int        bisect_flag;     /* return status of bisect() */
  int        ql_flag;         /* return status of ql() */
  int        local_debug;     /* whether to check bisection results with ql */
  int        bisect();        /* locates eigvals using bisection on Sturm seq. */
  int        ql();            /* computes eigenvalues of T using eispack algorithm */
  void       shell_sort();    /* sorts vector of eigenvalues */
  double *   mkvec();         /* to allocate a vector */
  void       frvec();         /* free vector */
  void       cpvec();         /* vector copy */
  void       bail();          /* our exit routine */
  void       strout();        /* string out to screen and output file */

  /* Determine number of ritzvals to find on left and right ends */
  nvals_left  = max(d, left_goodlim);
  nvals_right = min(j - nvals_left, right_goodlim);

  /* Estimate work for bisection vs. ql assuming bisection takes 5j flops per
     step, ql takes 30j^2 flops per call. (Ignore sorts, copies, addressing.) */

  bisection_tol  = eigtol * eigtol / bis_safety;
  pred_steps     = (log10(Anorm / bisection_tol) / log10(2.0)) + 1;
  tot_pred_steps = (nvals_left + nvals_right) * pred_steps;

  bisect_flag = ql_flag = 0;

  if (5 * tot_pred_steps < 30 * j) {
    if (DEBUG_EVECS > 2)
      printf("  tridiagonal solver: bisection\n");

    /* Set local_debug = TRUE for a table checking bisection against QL. */
    local_debug = FALSE;
    if (local_debug) {
      ritz_sav = mkvec(1, j);
      cpvec(ritz_sav, 1, j, alpha);
      cpvec(workj, 0, j, beta);
      ql_flag = ql(ritz_sav, workj, j);
      if (ql_flag != 0) {
        bail("Aborting debugging procedure in get_ritzvals().\n", 1);
      }
      shell_sort(j, &ritz_sav[1]);
    }

    bisect_flag = bisect(alpha, beta, j, Anorm, workj, ritz, nvals_left, nvals_right, bisection_tol,
                         ritz_sav, pred_steps + 10);

    if (local_debug)
      frvec(ritz_sav, 1);
  }

  else {
    if (DEBUG_EVECS > 2)
      printf("  tridiagonal solver: ql\n");
    cpvec(ritz, 1, j, alpha);
    cpvec(workj, 0, j, beta);
    ql_flag = ql(ritz, workj, j);
    shell_sort(j, &ritz[1]);
  }

  if (bisect_flag != 0 && ql_flag == 0) {
    if (DEBUG_EVECS > 0 || WARNING_EVECS > 0) {
      strout("WARNING: Sturm bisection of T failed; switching to QL.\n");
    }
    if (DEBUG_EVECS > 1 || WARNING_EVECS > 1) {
      if (bisect_flag == 1)
        strout("         - failure detected in sturmcnt().\n");
      if (bisect_flag == 2)
        strout("         - maximum number of bisection steps reached.\n");
    }
    cpvec(ritz, 1, j, alpha);
    cpvec(workj, 0, j, beta);
    ql_flag = ql(ritz, workj, j);
    shell_sort(j, &ritz[1]);
  }

  if (ql_flag != 0 && bisect_flag == 0) {
    if (DEBUG_EVECS > 0 || WARNING_EVECS > 0) {
      strout("WARNING: QL failed for T; switching to Sturm bisection.\n");
    }
    bisect_flag = bisect(alpha, beta, j, Anorm, workj, ritz, nvals_left, nvals_right, bisection_tol,
                         ritz_sav, pred_steps + 3);
  }

  if (bisect_flag != 0 && ql_flag != 0) {
    if (DEBUG_EVECS > 0 || WARNING_EVECS > 0) {
      return (1); /* can't recover; bail out with error code */
    }
  }

  return (0); /* ... things seem ok. */
}
Esempio n. 11
0
void 
lanczos_FO (
    struct vtx_data **A,		/* graph data structure */
    int n,			/* number of rows/colums in matrix */
    int d,			/* problem dimension = # evecs to find */
    double **y,			/* columns of y are eigenvectors of A  */
    double *lambda,		/* ritz approximation to eigenvals of A */
    double *bound,		/* on ritz pair approximations to eig pairs of A */
    double eigtol,		/* tolerance on eigenvectors */
    double *vwsqrt,		/* square root of vertex weights */
    double maxdeg,               /* maximum degree of graph */
    int version		/* 1 = standard mode, 2 = inverse operator mode */
)

{
    extern FILE *Output_File;	/* output file or NULL */
    extern int DEBUG_EVECS;	/* print debugging output? */
    extern int DEBUG_TRACE;	/* trace main execution path */
    extern int WARNING_EVECS;	/* print warning messages? */
    extern int LANCZOS_MAXITNS;         /* maximum Lanczos iterations allowed */
    extern double BISECTION_SAFETY;	/* safety factor for bisection algorithm */
    extern double SRESTOL;		/* resid tol for T evec comp */
    extern double DOUBLE_MAX;	/* Warning on inaccurate computation of evec of T */
    extern double splarax_time;	/* time matvecs */
    extern double orthog_time;	/* time orthogonalization work */
    extern double tevec_time;	/* time tridiagonal eigvec work */
    extern double evec_time;	/* time to generate eigenvectors */
    extern double ql_time;      /* time tridiagonal eigval work */
    extern double blas_time;	/* time for blas (not assembly coded) */
    extern double init_time;	/* time for allocating memory, etc. */
    extern double scan_time;	/* time for scanning bounds list */
    extern double debug_time;	/* time for debug computations and output */
    int       i, j;		/* indicies */
    int       maxj;		/* maximum number of Lanczos iterations */
    double   *u, *r;		/* Lanczos vectors */
    double   *Aq;		/* sparse matrix-vector product vector */
    double   *alpha, *beta;	/* the Lanczos scalars from each step */
    double   *ritz;		/* copy of alpha for tqli */
    double   *workj;		/* work vector (eg. for tqli) */
    double   *workn;		/* work vector (eg. for checkeig) */
    double   *s;		/* eigenvector of T */
    double  **q;		/* columns of q = Lanczos basis vectors */
    double   *bj;		/* beta(j)*(last element of evecs of T) */
    double    bis_safety;	/* real safety factor for bisection algorithm */
    double    Sres;		/* how well Tevec calculated eigvecs */
    double    Sres_max;		/* Maximum value of Sres */
    int       inc_bis_safety;	/* need to increase bisection safety */
    double   *Ares;		/* how well Lanczos calculated each eigpair */
    double   *inv_lambda;	/* eigenvalues of inverse operator */
    int      *index;		/* the Ritz index of an eigenpair */
    struct orthlink *orthlist  = NULL;	/* vectors to orthogonalize against in Lanczos */
    struct orthlink *orthlist2 = NULL;	/* vectors to orthogonalize against in Symmlq */
    struct orthlink *temp;	/* for expanding orthogonalization list */
    double   *ritzvec=NULL;	/* ritz vector for current iteration */
    double   *zeros=NULL;	/* vector of all zeros */
    double   *ones=NULL;	/* vector of all ones */
    struct scanlink *scanlist;	/* list of fields for min ritz vals */
    struct scanlink *curlnk;	/* for traversing the scanlist */
    double    bji_tol;		/* tol on bji estimate of A e-residual */
    int       converged;	/* has the iteration converged? */
    double    time;		/* current clock time */
    double    shift, rtol;		/* symmlq input */
    long      precon, goodb, nout;	/* symmlq input */
    long      checka, intlim;	/* symmlq input */
    double    anorm, acond;	/* symmlq output */
    double    rnorm, ynorm;	/* symmlq output */
    long      istop, itn;	/* symmlq output */
    double    macheps;		/* machine precision calculated by symmlq */
    double    normxlim;		/* a stopping criteria for symmlq */
    long      itnmin;		/* enforce minimum number of iterations */
    int       symmlqitns;	/* # symmlq itns */
    double   *wv1=NULL, *wv2=NULL, *wv3=NULL;	/* Symmlq work space */
    double   *wv4=NULL, *wv5=NULL, *wv6=NULL;	/* Symmlq work space */
    long      long_n;		/* long int copy of n for symmlq */
    int       ritzval_flag = 0;	/* status flag for ql() */
    double    Anorm;            /* Norm estimate of the Laplacian matrix */
    int       left, right;      /* ranges on the search for ritzvals */
    int       memory_ok;        /* TRUE as long as don't run out of memory */

    double   *mkvec();		/* allocates space for a vector */
    double   *mkvec_ret();      /* mkvec() which returns error code */
    double    dot();		/* standard dot product routine */
    struct orthlink *makeorthlnk();	/* make space for entry in orthog. set */
    double    ch_norm();		/* vector norm */
    double    Tevec();		/* calc evec of T by linear recurrence */
    struct scanlink *mkscanlist();	/* make scan list for min ritz vecs */
    double    lanc_seconds();	/* current clock timer */
    int       symmlq_(), get_ritzvals();
    void      setvec(), vecscale(), update(), vecran(), strout();
    void      splarax(), scanmin(), scanmax(), frvec(), orthogonalize();
    void      orthog1(), orthogvec(), bail(), warnings(), mkeigvecs();

    if (DEBUG_TRACE > 0) {
        printf("<Entering lanczos_FO>\n");
    }

    if (DEBUG_EVECS > 0) {
	if (version == 1) {
    	    printf("Full orthogonalization Lanczos, matrix size = %d\n", n);
	}
	else {
    	    printf("Full orthogonalization Lanczos, inverted operator, matrix size = %d\n", n);
	}
    }

    /* Initialize time. */
    time = lanc_seconds();

    if (n < d + 1) {
	bail("ERROR: System too small for number of eigenvalues requested.",1);
	/* d+1 since don't use zero eigenvalue pair */
    }

    /* Allocate Lanczos space. */
    maxj = LANCZOS_MAXITNS;
    u = mkvec(1, n);
    r = mkvec(1, n);
    Aq = mkvec(1, n);
    ritzvec = mkvec(1, n);
    zeros = mkvec(1, n);
    setvec(zeros, 1, n, 0.0);
    workn = mkvec(1, n);
    Ares = mkvec(1, d);
    inv_lambda = mkvec(1, d);
    index = smalloc((d + 1) * sizeof(int));
    alpha = mkvec(1, maxj);
    beta = mkvec(1, maxj + 1);
    ritz = mkvec(1, maxj);
    s = mkvec(1, maxj);
    bj = mkvec(1, maxj);
    workj = mkvec(1, maxj + 1);
    q = smalloc((maxj + 1) * sizeof(double *));
    scanlist = mkscanlist(d);

    if (version == 2) {
        /* Allocate Symmlq space all in one chunk. */
        wv1 = smalloc(6 * (n + 1) * sizeof(double));
        wv2 = &wv1[(n + 1)];
        wv3 = &wv1[2 * (n + 1)];
        wv4 = &wv1[3 * (n + 1)];
        wv5 = &wv1[4 * (n + 1)];
        wv6 = &wv1[5 * (n + 1)];

        /* Set invariant symmlq parameters */
        precon = FALSE;		/* FALSE until we figure out a good way */
        goodb = FALSE;		/* should be FALSE for this application */
        checka = FALSE;		/* if don't know by now, too bad */
        intlim = n;			/* set to enforce a maximum number of Symmlq itns */
        itnmin = 0;			/* set to enforce a minimum number of Symmlq itns */
        shift = 0.0;		/* since just solving rather than doing RQI */
        symmlqitns = 0;		/* total number of Symmlq iterations */
        nout = 0;			/* Effectively disabled - see notes in symmlq.f */
        rtol = 1.0e-5;		/* requested residual tolerance */
        normxlim = DOUBLE_MAX;	/* Effectively disables ||x|| termination criterion */
        long_n = n;			/* copy to long for linting */
    }

    /* Initialize. */
    vecran(r, 1, n);
    if (vwsqrt == NULL) {
	/* whack one's direction from initial vector */
	orthog1(r, 1, n);

	/* list the ones direction for later use in Symmlq */
	if (version == 2) {
	    orthlist2 = makeorthlnk();
	    ones = mkvec(1, n);
	    setvec(ones, 1, n, 1.0);
	    orthlist2->vec = ones;
	    orthlist2->pntr = NULL;
	}
    }
    else {
	/* whack vwsqrt direction from initial vector */
	orthogvec(r, 1, n, vwsqrt);

	if (version == 2) {
	    /* list the vwsqrt direction for later use in Symmlq */
	    orthlist2 = makeorthlnk();
	    orthlist2->vec = vwsqrt;
	    orthlist2->pntr = NULL;
	}
    }
    beta[1] = ch_norm(r, 1, n);
    q[0] = zeros;
    bji_tol = eigtol;
    orthlist = NULL;
    Sres_max = 0.0;
    Anorm = 2 * maxdeg;                         /* Gershgorin estimate for ||A|| */
    bis_safety = BISECTION_SAFETY;
    inc_bis_safety = FALSE;
    init_time += lanc_seconds() - time;

    /* Main Lanczos loop. */
    j = 1;
    converged = FALSE;
    memory_ok = TRUE;
    while ((j <= maxj) && (converged == FALSE) && memory_ok) {
	time = lanc_seconds();

	/* Allocate next Lanczos vector. If fail, back up one step and compute approx. eigvec. */
	q[j] = mkvec_ret(1, n);
        if (q[j] == NULL) {
	    memory_ok = FALSE;
  	    if (DEBUG_EVECS > 0 || WARNING_EVECS > 0) {
                strout("WARNING: Lanczos out of memory; computing best approximation available.\n");
            }
	    if (j <= 2) {
	        bail("ERROR: Sorry, can't salvage Lanczos.",1); 
  	        /* ... save yourselves, men.  */
	    }
            j--;
	}

	vecscale(q[j], 1, n, 1.0 / beta[j], r);
	blas_time += lanc_seconds() - time;
	time = lanc_seconds();
	if (version == 1) {
            splarax(Aq, A, n, q[j], vwsqrt, workn);
	}
	else {
	    symmlq_(&long_n, &(q[j][1]), &wv1[1], &wv2[1], &wv3[1], &wv4[1], &Aq[1], &wv5[1],
		&wv6[1], &checka, &goodb, &precon, &shift, &nout,
		&intlim, &rtol, &istop, &itn, &anorm, &acond,
		&rnorm, &ynorm, (double *) A, vwsqrt, (double *) orthlist2,
		&macheps, &normxlim, &itnmin);
	    symmlqitns += itn;
	    if (DEBUG_EVECS > 2) {
	        printf("Symmlq report:      rtol %g\n", rtol);
	        printf("  system norm %g, solution norm %g\n", anorm, ynorm);
	        printf("  system condition %g, residual %g\n", acond, rnorm);
	        printf("  termination condition %2ld, iterations %3ld\n", istop, itn);
	    }
	}
	splarax_time += lanc_seconds() - time;
	time = lanc_seconds();
	update(u, 1, n, Aq, -beta[j], q[j - 1]);
	alpha[j] = dot(u, 1, n, q[j]);
	update(r, 1, n, u, -alpha[j], q[j]);
	blas_time += lanc_seconds() - time;
	time = lanc_seconds();
	if (vwsqrt == NULL) {
	    orthog1(r, 1, n);
	}
	else {
	    orthogvec(r, 1, n, vwsqrt);
	}
	orthogonalize(r, n, orthlist);
	temp = orthlist;
	orthlist = makeorthlnk();
	orthlist->vec = q[j];
	orthlist->pntr = temp;
	beta[j + 1] = ch_norm(r, 1, n);
	orthog_time += lanc_seconds() - time;

	time = lanc_seconds();
	left = j/2;
	right = j - left + 1;
	if (inc_bis_safety) {
	    bis_safety *= 10;
	    inc_bis_safety = FALSE;
	}
	ritzval_flag = get_ritzvals(alpha, beta+1, j, Anorm, workj+1, 
                                    ritz, d, left, right, eigtol, bis_safety);
        /* ... have to off-set beta and workj since full orthogonalization
               indexes these from 1 to maxj+1 whereas selective orthog.
               indexes them from 0 to maxj */ 

	if (ritzval_flag != 0) {
            bail("ERROR: Both Sturm bisection and QL failed.",1);
	    /* ... give up. */
 	}
        ql_time += lanc_seconds() - time;

	/* Convergence check using Paige bji estimates. */
	time = lanc_seconds();
	for (i = 1; i <= j; i++) {
	    Sres = Tevec(alpha, beta, j, ritz[i], s);
	    if (Sres > Sres_max) {
		Sres_max = Sres;
	    }
	    if (Sres > SRESTOL) {
		inc_bis_safety = TRUE;
	    }
	    bj[i] = s[j] * beta[j + 1];
	}
	tevec_time += lanc_seconds() - time;


	time = lanc_seconds();
	if (version == 1) {
	    scanmin(ritz, 1, j, &scanlist);
	}
	else {
	    scanmax(ritz, 1, j, &scanlist);
	}
	converged = TRUE;
	if (j < d)
	    converged = FALSE;
	else {
	    curlnk = scanlist;
	    while (curlnk != NULL) {
		if (bj[curlnk->indx] > bji_tol) {
		    converged = FALSE;
		}
		curlnk = curlnk->pntr;
	    }
	}
	scan_time += lanc_seconds() - time;
	j++;
    }
    j--;

    /* Collect eigenvalue and bound information. */
    time = lanc_seconds();
    mkeigvecs(scanlist,lambda,bound,index,bj,d,&Sres_max,alpha,beta+1,j,s,y,n,q);
    evec_time += lanc_seconds() - time;

    /* Analyze computation for and report additional problems */
    time = lanc_seconds();
    if (DEBUG_EVECS>0 && version == 2) {
	printf("\nTotal Symmlq iterations %3d\n", symmlqitns);
    }
    if (version == 2) {
        for (i = 1; i <= d; i++) {
	    lambda[i] = 1.0/lambda[i];
	}
    }
    warnings(workn, A, y, n, lambda, vwsqrt, Ares, bound, index,
             d, j, maxj, Sres_max, eigtol, u, Anorm, Output_File);
    debug_time += lanc_seconds() - time;

    /* Free any memory allocated in this routine. */
    time = lanc_seconds();
    frvec(u, 1);
    frvec(r, 1);
    frvec(Aq, 1);
    frvec(ritzvec, 1);
    frvec(zeros, 1);
    if (vwsqrt == NULL && version == 2) {
	frvec(ones, 1);
    }
    frvec(workn, 1);
    frvec(Ares, 1);
    frvec(inv_lambda, 1);
    sfree(index);
    frvec(alpha, 1);
    frvec(beta, 1);
    frvec(ritz, 1);
    frvec(s, 1);
    frvec(bj, 1);
    frvec(workj, 1);
    if (version == 2) {
	frvec(wv1, 0);
    }
    while (scanlist != NULL) {
	curlnk = scanlist->pntr;
	sfree(scanlist);
	scanlist = curlnk;
    }
    for (i = 1; i <= j; i++) {
	frvec(q[i], 1);
    }
    while (orthlist != NULL) {
	temp = orthlist->pntr;
	sfree(orthlist);
	orthlist = temp;
    }
    while (version == 2 && orthlist2 != NULL) {
	temp = orthlist2->pntr;
	sfree(orthlist2);
	orthlist2 = temp;
    }
    sfree(q);
    init_time += lanc_seconds() - time;
}
int 
lanczos_ext_float (
    struct vtx_data **A,		/* sparse matrix in row linked list format */
    int n,			/* problem size */
    int d,			/* problem dimension = number of eigvecs to find */
    double **y,			/* columns of y are eigenvectors of A  */
    double eigtol,		/* tolerance on eigenvectors */
    double *vwsqrt,		/* square roots of vertex weights */
    double maxdeg,		/* maximum degree of graph */
    int version,		/* flags which version of sel. orth. to use */
    double *gvec,		/* the rhs n-vector in the extended eigen problem */
    double sigma		/* specifies the norm constraint on extended
				   eigenvector */
)
{
    extern FILE *Output_File;		/* output file or null */
    extern int LANCZOS_SO_INTERVAL;	/* interval between orthogonalizations */
    extern int LANCZOS_MAXITNS;         /* maximum Lanczos iterations allowed */
    extern int DEBUG_EVECS;		/* print debugging output? */
    extern int DEBUG_TRACE;		/* trace main execution path */
    extern int WARNING_EVECS;		/* print warning messages? */
    extern double BISECTION_SAFETY;	/* safety factor for T bisection */
    extern double SRESTOL;		/* resid tol for T evec comp */
    extern double DOUBLE_EPSILON;	/* machine precision */
    extern double DOUBLE_MAX;		/* largest double value */
    extern double splarax_time; /* time matvec */
    extern double orthog_time;  /* time orthogonalization work */
    extern double evec_time;    /* time to generate eigenvectors */
    extern double ql_time;      /* time tridiagonal eigenvalue work */
    extern double blas_time;    /* time for blas. linear algebra */
    extern double init_time;    /* time to allocate, intialize variables */
    extern double scan_time;    /* time for scanning eval and bound lists */
    extern double debug_time;   /* time for (some of) debug computations */
    extern double ritz_time;    /* time to generate ritz vectors */
    extern double pause_time;   /* time to compute whether to pause */
    int       i, j, k;		/* indicies */
    int       maxj;		/* maximum number of Lanczos iterations */
    float    *u, *r;		/* Lanczos vectors */
    double   *u_double;		/* double version of u */
    double   *alpha, *beta;	/* the Lanczos scalars from each step */
    double   *ritz;		/* copy of alpha for ql */
    double   *workj;		/* work vector, e.g. copy of beta for ql */
    float    *workn;		/* work vector, e.g. product Av for checkeig */
    double   *workn_double;	/* work vector, e.g. product Av for checkeig */
    double   *s;		/* eigenvector of T */
    float   **q;		/* columns of q are Lanczos basis vectors */
    double   *bj;		/* beta(j)*(last el. of corr. eigvec s of T) */
    double    bis_safety;	/* real safety factor for T bisection */
    double    Sres;		/* how well Tevec calculated eigvec s */
    double    Sres_max;		/* Max value of Sres */
    int       inc_bis_safety;	/* need to increase bisection safety */
    double   *Ares;		/* how well Lanczos calc. eigpair lambda,y */
    int      *index;		/* the Ritz index of an eigenpair */
    struct orthlink_float **solist;	/* vec. of structs with vecs. to orthog. against */
    struct scanlink *scanlist;		/* linked list of fields to do with min ritz vals */
    struct scanlink *curlnk;		/* for traversing the scanlist */
    double    bji_tol;		/* tol on bji est. of eigen residual of A */
    int       converged;	/* has the iteration converged? */
    double    goodtol;		/* error tolerance for a good Ritz vector */
    int       ngood;		/* total number of good Ritz pairs at current step */
    int       maxngood;		/* biggest val of ngood through current step */
    int       left_ngood;	/* number of good Ritz pairs on left end */
    int       lastpause;	/* Most recent step with good ritz vecs */
    int       nopauses;		/* Have there been any pauses? */
    int       interval;		/* number of steps between pauses */
    double    time;             /* Current clock time */
    int       left_goodlim;	/* number of ritz pairs checked on left end */
    double    Anorm;		/* Norm estimate of the Laplacian matrix */
    int       pausemode;	/* which Lanczos pausing criterion to use */
    int       pause;		/* whether to pause */
    int       temp;		/* used to prevent redundant index computations */
    double   *extvec;		/* n-vector solving the extended A eigenproblem */
    double   *v;		/* j-vector solving the extended T eigenproblem */
    double    extval=0.0;	/* computed extended eigenvalue (of both A and T) */
    double   *work1, *work2;    /* work vectors */
    double    check;		/* to check an orthogonality condition */
    double    numerical_zero;	/* used for zero in presense of round-off  */
    int       ritzval_flag;	/* status flag for get_ritzvals() */
    double    resid;		/* residual */
    int       memory_ok;	/* TRUE until memory runs out */
    float    *vwsqrt_float = NULL;     /* float version of vwsqrt */

    struct orthlink_float *makeorthlnk_float();	/* makes space for new entry in orthog. set */
    struct scanlink *mkscanlist();		/* init scan list for min ritz vecs */
    double   *mkvec();			/* allocates space for a vector */
    float    *mkvec_float();		/* allocates space for a vector */
    float    *mkvec_ret_float();	/* mkvec() which returns error code */
    double    dot_float();		/* standard dot product routine */
    double    ch_norm();			/* vector norm */
    double    norm_float();		/* vector norm */
    double    Tevec();			/* calc eigenvector of T by linear recurrence */
    double    lanc_seconds();   	/* switcheable timer */
          	/* free allocated memory safely */
    int       lanpause_float();      	/* figure when to pause Lanczos iteration */
    int       get_ritzvals();   	/* compute eigenvalues of T */
    void      setvec();         	/* initialize a vector */
    void      setvec_float();         	/* initialize a vector */
    void      vecscale_float();     	/* scale a vector */
    void      splarax();        	/* matrix vector multiply */
    void      splarax_float();        	/* matrix vector multiply */
    void      update_float();         	/* add scalar multiple of a vector to another */
    void      sorthog_float();        	/* orthogonalize vector against list of others */
    void      bail();           	/* our exit routine */
    void      scanmin();        	/* store small values of vector in linked list */
    void      frvec();         		/* free vector */
    void      frvec_float();          	/* free vector */
    void      scadd();          	/* add scalar multiple of vector to another */
    void      scadd_float();          	/* add scalar multiple of vector to another */
    void      scadd_mixed();          	/* add scalar multiple of vector to another */
    void      orthog1_float();        	/* efficiently orthog. against vector of ones */
    void      solistout_float();      	/* print out orthogonalization list */
    void      doubleout();      	/* print a double precision number */
    void      orthogvec_float();      	/* orthogonalize one vector against another */
    void      double_to_float();	/* copy a double vector to a float vector */
    void      get_extval();		/* find extended Ritz values */
    void      scale_diag();		/* scale vector by diagonal matrix */
    void      scale_diag_float();	/* scale vector by diagonal matrix */
    void      strout();			/* print string to screen and file */

    if (DEBUG_TRACE > 0) {
	printf("<Entering lanczos_ext_float>\n");
    }

    if (DEBUG_EVECS > 0) {
	printf("Selective orthogonalization Lanczos for extended eigenproblem, matrix size = %d.\n", n);
    }

    /* Initialize time. */
    time = lanc_seconds();

    if (d != 1) {
	bail("ERROR: Extended Lanczos only available for bisection.",1);
        /* ... something must be wrong upstream. */
    }

    if (n < d + 1) {
	bail("ERROR: System too small for number of eigenvalues requested.",1);
	/* ... d+1 since don't use zero eigenvalue pair */
    }

    /* Allocate space. */
    maxj = LANCZOS_MAXITNS;
    u = mkvec_float(1, n);
    u_double = mkvec(1, n);
    r = mkvec_float(1, n);
    workn = mkvec_float(1, n);
    workn_double = mkvec(1, n);
    Ares = mkvec(0, d);
    index = smalloc((d + 1) * sizeof(int));
    alpha = mkvec(1, maxj);
    beta = mkvec(0, maxj);
    ritz = mkvec(1, maxj);
    s = mkvec(1, maxj);
    bj = mkvec(1, maxj);
    workj = mkvec(0, maxj);
    q = smalloc((maxj + 1) * sizeof(float *));
    solist = smalloc((maxj + 1) * sizeof(struct orthlink_float *));
    scanlist = mkscanlist(d);
    extvec = mkvec(1, n);
    v = mkvec(1, maxj);
    work1 = mkvec(1, maxj);
    work2 = mkvec(1, maxj);

    /* Set some constants governing orthogonalization */
    ngood = 0;
    maxngood = 0;
    bji_tol = eigtol;
    Anorm = 2 * maxdeg;				/* Gershgorin estimate for ||A|| */
    goodtol = Anorm * sqrt(DOUBLE_EPSILON);	/* Parlett & Scott's bound, p.224 */
    interval = 2 + (int) min(LANCZOS_SO_INTERVAL - 2, n / (2 * LANCZOS_SO_INTERVAL));
    bis_safety = BISECTION_SAFETY;
    numerical_zero = 1.0e-6;

    if (DEBUG_EVECS > 0) {
	printf("  maxdeg %g\n", maxdeg);
	printf("  goodtol %g\n", goodtol);
	printf("  interval %d\n", interval);
        printf("  maxj %d\n", maxj);
    }

    /* Make a float copy of vwsqrt */
    if (vwsqrt != NULL) {
      vwsqrt_float = mkvec_float(0,n);
      double_to_float(vwsqrt_float,1,n,vwsqrt);
    }

    /* Initialize space. */
    double_to_float(r,1,n,gvec);
    if (vwsqrt_float != NULL) {
	scale_diag_float(r,1,n,vwsqrt_float);
    }
    check = norm_float(r,1,n);
    if (vwsqrt_float == NULL) {
	orthog1_float(r, 1, n);
    }
    else { 
	orthogvec_float(r, 1, n, vwsqrt_float);
    }
    check = fabs(check - norm_float(r,1,n));
    if (check > 10*numerical_zero && WARNING_EVECS > 0) {
	strout("WARNING: In terminal propagation, rhs should have no component in the"); 
        printf("         nullspace of the Laplacian, so check val %g should be zero.\n", check); 
	if (Output_File != NULL) {
            fprintf(Output_File,
		"         nullspace of the Laplacian, so check val %g should be zero.\n",
	    check); 
	}
    }
    beta[0] = norm_float(r, 1, n);
    q[0] = mkvec_float(1, n);
    setvec_float(q[0], 1, n, 0.0);
    setvec(bj, 1, maxj, DOUBLE_MAX);

    if (beta[0] < numerical_zero) {
     /* The rhs vector, Dg, of the transformed problem is numerically zero or is
	in the null space of the Laplacian, so this is not a well posed extended
	eigenproblem. Set maxj to zero to force a quick exit but still clean-up
	memory and return(1) to indicate to eigensolve that it should call the
	default eigensolver routine for the standard eigenproblem. */
	maxj = 0;
    }
	
    /* Main Lanczos loop. */
    j = 1;
    lastpause = 0;
    pausemode = 1;
    left_ngood = 0;
    left_goodlim = 0;
    converged = FALSE;
    Sres_max = 0.0;
    inc_bis_safety = FALSE;
    nopauses = TRUE;
    memory_ok = TRUE;
    init_time += lanc_seconds() - time;
    while ((j <= maxj) && (!converged) && memory_ok) {
        time = lanc_seconds();

	/* Allocate next Lanczos vector. If fail, back up to last pause. */
	q[j] = mkvec_ret_float(1, n);
        if (q[j] == NULL) {
	    memory_ok = FALSE;
  	    if (DEBUG_EVECS > 0 || WARNING_EVECS > 0) {
                strout("WARNING: Lanczos_ext out of memory; computing best approximation available.\n");
            }
	    if (nopauses) {
	        bail("ERROR: Sorry, can't salvage Lanczos_ext.",1); 
  	        /* ... save yourselves, men.  */
	    }
    	    for (i = lastpause+1; i <= j-1; i++) {
	        frvec_float(q[i], 1);
    	    }
            j = lastpause;
	}

        /* Basic Lanczos iteration */
	vecscale_float(q[j], 1, n, (float)(1.0 / beta[j - 1]), r);
        blas_time += lanc_seconds() - time;
        time = lanc_seconds(); 
	splarax_float(u, A, n, q[j], vwsqrt_float, workn);
        splarax_time += lanc_seconds() - time;
        time = lanc_seconds();
	update_float(r, 1, n, u, (float)(-beta[j - 1]), q[j - 1]);
	alpha[j] = dot_float(r, 1, n, q[j]);
	update_float(r, 1, n, r, (float)(-alpha[j]), q[j]);
        blas_time += lanc_seconds() - time;

        /* Selective orthogonalization */
        time = lanc_seconds();
	if (vwsqrt_float == NULL) {
	    orthog1_float(r, 1, n);
	}
	else {
	    orthogvec_float(r, 1, n, vwsqrt_float);
	}
	if ((j == (lastpause + 1)) || (j == (lastpause + 2))) {
	    sorthog_float(r, n, solist, ngood);
	}
        orthog_time += lanc_seconds() - time;
	beta[j] = norm_float(r, 1, n);
        time = lanc_seconds();
	pause = lanpause_float(j, lastpause, interval, q, n, &pausemode, version, beta[j]);
        pause_time += lanc_seconds() - time;
	if (pause) {
	    nopauses = FALSE;
	    lastpause = j;

	    /* Compute limits for checking Ritz pair convergence. */
	    if (version == 2) {
		if (left_ngood + 2 > left_goodlim) {
		    left_goodlim = left_ngood + 2;
		}
	    }

	    /* Special case: need at least d Ritz vals on left. */
	    left_goodlim = max(left_goodlim, d);

	    /* Special case: can't find more than j total Ritz vals. */
	    if (left_goodlim > j) {
		left_goodlim = min(left_goodlim, j);
	    }

	    /* Find Ritz vals using faster of Sturm bisection or ql. */
            time = lanc_seconds();
	    if (inc_bis_safety) {
		bis_safety *= 10;
		inc_bis_safety = FALSE;
	    }
	    ritzval_flag = get_ritzvals(alpha, beta, j, Anorm, workj, ritz, d,
			 left_goodlim, 0, eigtol, bis_safety);
            ql_time += lanc_seconds() - time;

	    if (ritzval_flag != 0) {
                bail("ERROR: Lanczos_ext failed in computing eigenvalues of T.",1);
		/* ... we recover from this in lanczos_SO, but don't worry here. */ 
	    }

	    /* Scan for minimum evals of tridiagonal. */
            time = lanc_seconds();
	    scanmin(ritz, 1, j, &scanlist);
            scan_time += lanc_seconds() - time;

	    /* Compute Ritz pair bounds at left end. */
            time = lanc_seconds();
	    setvec(bj, 1, j, 0.0);
	    for (i = 1; i <= left_goodlim; i++) {
		Sres = Tevec(alpha, beta - 1, j, ritz[i], s);
		if (Sres > Sres_max) {
		    Sres_max = Sres;
		}
		if (Sres > SRESTOL) {
		    inc_bis_safety = TRUE;
		}
		bj[i] = s[j] * beta[j];
	    }
            ritz_time += lanc_seconds() - time;

	    /* Show the portion of the spectrum checked for convergence. */
	    if (DEBUG_EVECS > 2) {
                time = lanc_seconds();
		printf("\nindex         Ritz vals            bji bounds\n");
		for (i = 1; i <= left_goodlim; i++) {
		    printf("  %3d", i);
		    doubleout(ritz[i], 1);
		    doubleout(bj[i], 1);
		    printf("\n");
		}
		printf("\n");
		curlnk = scanlist;
		while (curlnk != NULL) {
		    temp = curlnk->indx;
		    if ((temp > left_goodlim) && (temp < j)) {
			printf("  %3d", temp);
			doubleout(ritz[temp], 1);
			doubleout(bj[temp], 1);
			printf("\n");
		    }
		    curlnk = curlnk->pntr;
		}
		printf("                            -------------------\n");
		printf("                goodtol:    %19.16f\n\n", goodtol);
                debug_time += lanc_seconds() - time;
	    }

	    get_extval(alpha, beta, j, ritz[1], s, eigtol, beta[0], sigma, &extval,
		v, work1, work2);

	    /* check convergence of Ritz pairs */
            time = lanc_seconds();
	    converged = TRUE;
	    if (j < d)
		converged = FALSE;
	    else {
		curlnk = scanlist;
		while (curlnk != NULL) {
		    if (bj[curlnk->indx] > bji_tol) {
			converged = FALSE;
		    }
		    curlnk = curlnk->pntr;
		}
	    }
            scan_time += lanc_seconds() - time;

	    if (!converged) {
		ngood = 0;
		left_ngood = 0;	/* for setting left_goodlim on next loop */

		/* Compute converged Ritz pairs on left end */
                time = lanc_seconds();
		for (i = 1; i <= left_goodlim; i++) {
		    if (bj[i] <= goodtol) {
			ngood += 1;
			left_ngood += 1;
			if (ngood > maxngood) {
			    maxngood = ngood;
			    solist[ngood] = makeorthlnk_float();
			    (solist[ngood])->vec = mkvec_float(1, n);
			}
			(solist[ngood])->index = i;
			Sres = Tevec(alpha, beta - 1, j, ritz[i], s);
			if (Sres > Sres_max) {
			    Sres_max = Sres;
			}
			if (Sres > SRESTOL) {
			    inc_bis_safety = TRUE;
			}
			setvec_float((solist[ngood])->vec, 1, n, 0.0);
			for (k = 1; k <= j; k++) {
			    scadd_float((solist[ngood])->vec, 1, n, s[k], q[k]);
			}
		    }
		}
                ritz_time += lanc_seconds() - time;

		if (DEBUG_EVECS > 2) {
                    time = lanc_seconds();
		    printf("  j %3d; goodlim lft %2d, rgt %2d; list ",
			   j, left_goodlim, 0);
		    solistout_float(solist, n, ngood, j);
                    printf("---------------------end of iteration---------------------\n\n");
                    debug_time += lanc_seconds() - time;
		}
	    }
	}
	j++;
    }
    j--;

    if (DEBUG_EVECS > 0) {
        time = lanc_seconds();
	if (maxj == 0) {
	    printf("Not extended eigenproblem -- calling ordinary eigensolver.\n");
	}
	else {
	    printf("  Lanczos_ext itns: %d\n",j);
	    printf("  eigenvalue: %g\n",ritz[1]);
	    printf("  extended eigenvalue: %g\n",extval);
	}
       debug_time += lanc_seconds() - time;
    }

    if (maxj != 0) {
        /* Compute (scaled) extended eigenvector. */
        time = lanc_seconds(); 
        setvec(y[1], 1, n, 0.0);
        for (k = 1; k <= j; k++) {
            scadd_mixed(y[1], 1, n, v[k], q[k]);
        }
        evec_time += lanc_seconds() - time;
        /* Note: assign() will scale this y vector back to x (since y = Dx) */ 

       /* Compute and check residual directly. Use the Ay = extval*y + Dg version of
          the problem for convenience. Note that u and v are used here as workspace */ 
        time = lanc_seconds(); 
        splarax(workn_double, A, n, y[1], vwsqrt, u_double);
        scadd(workn_double, 1, n, -extval, y[1]);
        scale_diag(gvec,1,n,vwsqrt);
        scadd(workn_double, 1, n, -1.0, gvec);
        resid = ch_norm(workn_double, 1, n);
        if (DEBUG_EVECS > 0) {
	    printf("  extended residual: %g\n",resid);
	    if (Output_File != NULL) {
	        fprintf(Output_File, "  extended residual: %g\n",resid);
	    }
	}
	if (WARNING_EVECS > 0 && resid > eigtol) {
	    printf("WARNING: Extended residual (%g) greater than tolerance (%g).\n",
		resid, eigtol);
	    if (Output_File != NULL) {
                fprintf(Output_File,
		    "WARNING: Extended residual (%g) greater than tolerance (%g).\n",
		    resid, eigtol);
	    }
	}
        debug_time += lanc_seconds() - time;
    } 


    /* free up memory */
    time = lanc_seconds();
    frvec_float(u, 1);
    frvec(u_double, 1);
    frvec_float(r, 1);
    frvec_float(workn, 1);
    frvec(workn_double, 1);
    frvec(Ares, 0);
    sfree(index);
    frvec(alpha, 1);
    frvec(beta, 0);
    frvec(ritz, 1);
    frvec(s, 1);
    frvec(bj, 1);
    frvec(workj, 0);
    for (i = 0; i <= j; i++) {
	frvec_float(q[i], 1);
    }
    sfree(q);
    while (scanlist != NULL) {
	curlnk = scanlist->pntr;
	sfree(scanlist);
	scanlist = curlnk;
    }
    for (i = 1; i <= maxngood; i++) {
	frvec_float((solist[i])->vec, 1);
	sfree(solist[i]);
    }
    sfree(solist);
    frvec(extvec, 1);
    frvec(v, 1);
    frvec(work1, 1);
    frvec(work2, 1);
    if (vwsqrt != NULL)
      frvec_float(vwsqrt_float, 1);
    
    init_time += lanc_seconds() - time;

    if (maxj == 0) return(1);  /* see note on beta[0] and maxj above */
    else return(0);
}
Esempio n. 13
0
/* g = polynomial, h = embedding. Return [[g,h]] */
static GEN
_subfield(GEN g, GEN h) { return mkvec(mkvec2(g,h)); }