Exemplo n.º 1
0
/* return P(X + c) using destructive Horner, optimize for c = 1,-1 */
GEN
translate_pol(GEN P, GEN c)
{
  pari_sp av = avma, lim;
  GEN Q, *R;
  long i, k, n;

  if (!signe(P) || gcmp0(c)) return gcopy(P);
  Q = shallowcopy(P);
  R = (GEN*)(Q+2); n = degpol(P);
  lim = stack_lim(av, 2);
  if (gcmp1(c))
  {
    for (i=1; i<=n; i++)
    {
      for (k=n-i; k<n; k++) R[k] = gadd(R[k], R[k+1]);
      if (low_stack(lim, stack_lim(av,2)))
      {
        if(DEBUGMEM>1) pari_warn(warnmem,"TR_POL(1), i = %ld/%ld", i,n);
        Q = gerepilecopy(av, Q); R = (GEN*)Q+2;
      }
    }
  }
  else if (gcmp_1(c))
  {
    for (i=1; i<=n; i++)
    {
      for (k=n-i; k<n; k++) R[k] = gsub(R[k], R[k+1]);
      if (low_stack(lim, stack_lim(av,2)))
      {
        if(DEBUGMEM>1) pari_warn(warnmem,"TR_POL(-1), i = %ld/%ld", i,n);
        Q = gerepilecopy(av, Q); R = (GEN*)Q+2;
      }
    }
  }
  else
  {
    for (i=1; i<=n; i++)
    {
      for (k=n-i; k<n; k++) R[k] = gadd(R[k], gmul(c, R[k+1]));
      if (low_stack(lim, stack_lim(av,2)))
      {
        if(DEBUGMEM>1) pari_warn(warnmem,"TR_POL, i = %ld/%ld", i,n);
        Q = gerepilecopy(av, Q); R = (GEN*)Q+2;
      }
    }
  }
  return gerepilecopy(av, Q);
}
Exemplo n.º 2
0
// Digit reversal
GEN
rev(GEN n, long B)
{
    pari_sp av = avma;
    if (typ(n) != t_INT)
        pari_err_TYPE("rev", n);
    GEN m = modis(n, B);
    n = divis(n, B);
    
    pari_sp btop = avma, st_lim = stack_lim(btop, 1);
    while (signe(n)) {
        m = addis(mulis(m, B), smodis(n, B));
        n = divis(n, B);
        if (low_stack(st_lim, stack_lim(btop, 1)))
            gerepileall(btop, 2, &m, &n);
    }
    m = gerepilecopy(av, m);
    return m;
}
Exemplo n.º 3
0
int
ratlift(GEN x, GEN m, GEN *a, GEN *b, GEN amax, GEN bmax)
{
  GEN d,d1,v,v1,q,r;
  pari_sp av = avma, av1, lim;
  long lb,lr,lbb,lbr,s,s0;
  ulong vmax;
  ulong xu,xu1,xv,xv1;		/* Lehmer stage recurrence matrix */
  int lhmres;			/* Lehmer stage return value */

  if ((typ(x) | typ(m) | typ(amax) | typ(bmax)) != t_INT) pari_err(arither1);
  if (signe(bmax) <= 0)
    pari_err(talker, "ratlift: bmax must be > 0, found\n\tbmax=%Z\n", bmax);
  if (signe(amax) < 0)
    pari_err(talker, "ratilft: amax must be >= 0, found\n\tamax=%Z\n", amax);
  /* check 2*amax*bmax < m */
  if (cmpii(shifti(mulii(amax, bmax), 1), m) >= 0)
    pari_err(talker, "ratlift: must have 2*amax*bmax < m, found\n\tamax=%Z\n\tbmax=%Z\n\tm=%Z\n", amax,bmax,m);
  /* we _could_ silently replace x with modii(x,m) instead of the following,
   * but let's leave this up to the caller
   */
  avma = av; s = signe(x);
  if (s < 0 || cmpii(x,m) >= 0)
    pari_err(talker, "ratlift: must have 0 <= x < m, found\n\tx=%Z\n\tm=%Z\n", x,m);

  /* special cases x=0 and/or amax=0 */
  if (s == 0)
  {
    if (a != NULL) *a = gen_0;
    if (b != NULL) *b = gen_1;
    return 1;
  }
  else if (signe(amax)==0)
    return 0;
  /* assert: m > x > 0, amax > 0 */

  /* check here whether a=x, b=1 is a solution */
  if (cmpii(x,amax) <= 0)
  {
    if (a != NULL) *a = icopy(x);
    if (b != NULL) *b = gen_1;
    return 1;
  }

  /* There is no special case for single-word numbers since this is
   * mainly meant to be used with large moduli.
   */
  (void)new_chunk(lgefint(bmax) + lgefint(amax)); /* room for a,b */
  d = m; d1 = x;
  v = gen_0; v1 = gen_1;
  /* assert d1 > amax, v1 <= bmax here */
  lb = lgefint(bmax);
  lbb = bfffo(*int_MSW(bmax));
  s = 1;
  av1 = avma; lim = stack_lim(av, 1);

  /* general case: Euclidean division chain starting with m div x, and
   * with bounds on the sequence of convergents' denoms v_j.
   * Just to be different from what invmod and bezout are doing, we work
   * here with the all-nonnegative matrices [u,u1;v,v1]=prod_j([0,1;1,q_j]).
   * Loop invariants:
   * (a) (sign)*[-v,v1]*x = [d,d1] (mod m)  (componentwise)
   * (sign initially +1, changes with each Euclidean step)
   * so [a,b] will be obtained in the form [-+d,v] or [+-d1,v1];
   * this congruence is a consequence of
   * (b) [x,m]~ = [u,u1;v,v1]*[d1,d]~,
   * where u,u1 is the usual numerator sequence starting with 1,0
   * instead of 0,1  (just multiply the eqn on the left by the inverse
   * matrix, which is det*[v1,-u1;-v,u], where "det" is the same as the
   * "(sign)" in (a)).  From m = v*d1 + v1*d and
   * (c) d > d1 >= 0, 0 <= v < v1,
   * we have d >= m/(2*v1), so while v1 remains smaller than m/(2*amax),
   * the pair [-(sign)*d,v] satisfies (1) but violates (2) (d > amax).
   * Conversely, v1 > bmax indicates that no further solutions will be
   * forthcoming;  [-(sign)*d,v] will be the last, and first, candidate.
   * Thus there's at most one point in the chain division where a solution
   * can live:  v < bmax, v1 >= m/(2*amax) > bmax,  and this is acceptable
   * iff in fact d <= amax  (e.g. m=221, x=34 or 35, amax=bmax=10 fail on
   * this count while x=32,33,36,37 succeed).  However, a division may leave
   * a zero residue before we ever reach this point  (consider m=210, x=35,
   * amax=bmax=10),  and our caller may find that gcd(d,v) > 1  (numerous
   * examples -- keep m=210 and consider any of x=29,31,32,33,34,36,37,38,
   * 39,40,41).
   * Furthermore, at the start of the loop body we have in fact
   * (c') 0 <= v < v1 <= bmax, d > d1 > amax >= 0,
   * (and are never done already).
   *
   * Main loop is similar to those of invmod() and bezout(), except for
   * having to determine appropriate vmax bounds, and checking termination
   * conditions.  The signe(d1) condition is only for paranoia
   */
  while (lgefint(d) > 3 && signe(d1))
  {
    /* determine vmax for lgcdii so as to ensure v won't overshoot.
     * If v+v1 > bmax, the next step would take v1 beyond the limit, so
     * since [+-d1,v1] is not a solution, we give up.  Otherwise if v+v1
     * is way shorter than bmax, use vmax=MAXULUNG.  Otherwise, set vmax
     * to a crude lower approximation of bmax/(v+v1), or to 1, which will
     * allow the inner loop to do one step
     */
    r = addii(v,v1);
    lr = lb - lgefint(r);
    lbr = bfffo(*int_MSW(r));
    if (cmpii(r,bmax) > 0)	/* done, not found */
    {
      avma = av;
      return 0;
    }
    else if (lr > 1)		/* still more than a word's worth to go */
    {
      vmax = MAXULONG;
    }
    else			/* take difference of bit lengths */
    {
      lr = (lr << TWOPOTBITS_IN_LONG) - lbb + lbr;
      if ((ulong)lr > BITS_IN_LONG)
	vmax = MAXULONG;
      else if (lr == 0)
	vmax = 1UL;
      else
	vmax = 1UL << (lr-1);
      /* the latter is pessimistic but faster than a division */
    }
    /* do a Lehmer-Jebelean round */
    lhmres = lgcdii((ulong *)d, (ulong *)d1, &xu, &xu1, &xv, &xv1, vmax);
    if (lhmres != 0)		/* check progress */
    {				/* apply matrix */
      if ((lhmres == 1) || (lhmres == -1))
      {
	s = -s;
	if (xv1 == 1)
	{
	  /* re-use v+v1 computed above */
	  v=v1; v1=r;
	  r = subii(d,d1); d=d1; d1=r;
	}
	else
	{
	  r = subii(d, mului(xv1,d1)); d=d1; d1=r;
	  r = addii(v, mului(xv1,v1)); v=v1; v1=r;
	}
      }
      else
      {
	r  = subii(muliu(d,xu),  muliu(d1,xv));
	d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
	r  = addii(muliu(v,xu),  muliu(v1,xv));
	v1 = addii(muliu(v,xu1), muliu(v1,xv1)); v = r;
        if (lhmres&1)
	{
          setsigne(d,-signe(d));
	  s = -s;
        }
        else if (signe(d1))
	{
          setsigne(d1,-signe(d1));
        }
      }
      /* check whether we're done.  Assert v <= bmax here.  Examine v1:
       * if v1 > bmax, check d and return 0 or 1 depending on the outcome;
       * if v1 <= bmax, check d1 and return 1 if d1 <= amax, otherwise
       * proceed.
       */
      if (cmpii(v1,bmax) > 0) /* certainly done */
      {
	avma = av;
	if (cmpii(d,amax) <= 0) /* done, found */
	{
	  if (a != NULL)
	  {
	    *a = icopy(d);
	    setsigne(*a,-s);	/* sign opposite to s */
	  }
	  if (b != NULL) *b = icopy(v);
	  return 1;
	}
	else			/* done, not found */
	  return 0;
      }
      else if (cmpii(d1,amax) <= 0) /* also done, found */
      {
	avma = av;
	if (a != NULL)
	{
	  if (signe(d1))
	  {
	    *a = icopy(d1);
	    setsigne(*a,s);	/* same sign as s */
	  }
	  else
	    *a = gen_0;
	}
	if (b != NULL) *b = icopy(v1);
	return 1;
      }
    } /* lhmres != 0 */

    if (lhmres <= 0 && signe(d1))
    {
      q = dvmdii(d,d1,&r);
#ifdef DEBUG_LEHMER
      fprintferr("Full division:\n");
      printf("  q = "); output(q); sleep (1);
#endif
      d=d1; d1=r;
      r = addii(v,mulii(q,v1));
      v=v1; v1=r;
      s = -s;
      /* check whether we are done now.  Since we weren't before the div, it
       * suffices to examine v1 and d1 -- the new d (former d1) cannot cut it
       */
      if (cmpii(v1,bmax) > 0) /* done, not found */
      {
	avma = av;
	return 0;
      }
      else if (cmpii(d1,amax) <= 0) /* done, found */
      {
	avma = av;
	if (a != NULL)
	{
	  if (signe(d1))
	  {
	    *a = icopy(d1);
	    setsigne(*a,s);	/* same sign as s */
	  }
	  else
	    *a = gen_0;
	}
	if (b != NULL) *b = icopy(v1);
	return 1;
      }
    }

    if (low_stack(lim, stack_lim(av,1)))
    {
      GEN *gptr[4]; gptr[0]=&d; gptr[1]=&d1; gptr[2]=&v; gptr[3]=&v1;
      if(DEBUGMEM>1) pari_warn(warnmem,"ratlift");
      gerepilemany(av1,gptr,4);
    }
  } /* end while */

  /* Postprocessing - final sprint.  Since we usually underestimate vmax,
   * this function needs a loop here instead of a simple conditional.
   * Note we can only get here when amax fits into one word  (which will
   * typically not be the case!).  The condition is bogus -- d1 is never
   * zero at the start of the loop.  There will be at most a few iterations,
   * so we don't bother collecting garbage
   */
  while (signe(d1))
  {
    /* Assertions: lgefint(d)==lgefint(d1)==3.
     * Moreover, we aren't done already, or we would have returned by now.
     * Recompute vmax...
     */
#ifdef DEBUG_RATLIFT
    fprintferr("rl-fs: d,d1=%Z,%Z\n", d, d1);
    fprintferr("rl-fs: v,v1=%Z,%Z\n", v, v1);
#endif
    r = addii(v,v1);
    lr = lb - lgefint(r);
    lbr = bfffo(*int_MSW(r));
    if (cmpii(r,bmax) > 0)	/* done, not found */
    {
      avma = av;
      return 0;
    }
    else if (lr > 1)		/* still more than a word's worth to go */
    {
      vmax = MAXULONG;		/* (cannot in fact happen) */
    }
    else			/* take difference of bit lengths */
    {
      lr = (lr << TWOPOTBITS_IN_LONG) - lbb + lbr;
      if ((ulong)lr > BITS_IN_LONG)
	vmax = MAXULONG;
      else if (lr == 0)
	vmax = 1UL;
      else
	vmax = 1UL << (lr-1);	/* as above */
    }
#ifdef DEBUG_RATLIFT
    fprintferr("rl-fs: vmax=%lu\n", vmax);
#endif
    /* single-word "Lehmer", discarding the gcd or whatever it returns */
    (void)rgcduu((ulong)*int_MSW(d), (ulong)*int_MSW(d1), vmax, &xu, &xu1, &xv, &xv1, &s0);
#ifdef DEBUG_RATLIFT
    fprintferr("rl-fs: [%lu,%lu; %lu,%lu] %s\n",
	       xu, xu1, xv, xv1,
	       s0 < 0 ? "-" : "+");
#endif
    if (xv1 == 1)		/* avoid multiplications */
    {
      /* re-use v+v1 computed above */
      v=v1; v1=r;
      r = subii(d,d1); d=d1; d1=r;
      s = -s;
    }
    else if (xu == 0)		/* and xv==1, xu1==1, xv1 > 1 */
    {
      r = subii(d, mului(xv1,d1)); d=d1; d1=r;
      r = addii(v, mului(xv1,v1)); v=v1; v1=r;
      s = -s;
    }
    else
    {
      r  = subii(muliu(d,xu),  muliu(d1,xv));
      d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
      r  = addii(muliu(v,xu),  muliu(v1,xv));
      v1 = addii(muliu(v,xu1), muliu(v1,xv1)); v = r;
      if (s0 < 0)
      {
	setsigne(d,-signe(d));
	s = -s;
      }
      else if (signe(d1))		/* sic: might vanish now */
      {
	setsigne(d1,-signe(d1));
      }
    }
    /* check whether we're done, as above.  Assert v <= bmax.  Examine v1:
     * if v1 > bmax, check d and return 0 or 1 depending on the outcome;
     * if v1 <= bmax, check d1 and return 1 if d1 <= amax, otherwise proceed.
     */
    if (cmpii(v1,bmax) > 0) /* certainly done */
    {
      avma = av;
      if (cmpii(d,amax) <= 0) /* done, found */
      {
	if (a != NULL)
	{
	  *a = icopy(d);
	  setsigne(*a,-s);	/* sign opposite to s */
	}
	if (b != NULL) *b = icopy(v);
	return 1;
      }
      else			/* done, not found */
	return 0;
    }
    else if (cmpii(d1,amax) <= 0) /* also done, found */
    {
      avma = av;
      if (a != NULL)
      {
	if (signe(d1))
	{
	  *a = icopy(d1);
	  setsigne(*a,s);	/* same sign as s */
	}
	else
	  *a = gen_0;
      }
      if (b != NULL) *b = icopy(v1);
      return 1;
    }
  } /* while */

  /* get here when we have run into d1 == 0 before returning... in fact,
   * this cannot happen.
   */
  pari_err(talker, "ratlift failed to catch d1 == 0\n");
  /* NOTREACHED */
  return 0;
}
Exemplo n.º 4
0
static GEN
nf_LLL_cmbf(nfcmbf_t *T, GEN p, long k, long rec)
{
  nflift_t *L = T->L;
  GEN pk = L->pk, PRK = L->prk, PRKinv = L->iprk, GSmin = L->GSmin;
  GEN Tpk = L->Tpk;

  GEN famod = T->fact, nf = T->nf, ZC = T->ZC, Br = T->Br;
  GEN Pbase = T->polbase, P = T->pol, dn = T->dn;
  GEN nfT = gel(nf,1);
  GEN Btra;
  long dnf = degpol(nfT), dP = degpol(P);

  double BitPerFactor = 0.5; /* nb bits / modular factor */
  long i, C, tmax, n0;
  GEN lP, Bnorm, Tra, T2, TT, CM_L, m, list, ZERO;
  double Bhigh;
  pari_sp av, av2, lim;
  long ti_LLL = 0, ti_CF = 0;
  pari_timer ti2, TI;

  lP = absi(leading_term(P));
  if (is_pm1(lP)) lP = NULL;

  n0 = lg(famod) - 1;
 /* Lattice: (S PRK), small vector (vS vP). To find k bound for the image,
  * write S = S1 q + S0, P = P1 q + P0
  * |S1 vS + P1 vP|^2 <= Bhigh for all (vS,vP) assoc. to true factors */
  Btra = mulrr(ZC, mulsr(dP*dP, normlp(Br, 2, dnf)));
  Bhigh = get_Bhigh(n0, dnf);
  C = (long)ceil(sqrt(Bhigh/n0)) + 1; /* C^2 n0 ~ Bhigh */
  Bnorm = dbltor( n0 * C * C + Bhigh );
  ZERO = zeromat(n0, dnf);

  av = avma; lim = stack_lim(av, 1);
  TT = cgetg(n0+1, t_VEC);
  Tra  = cgetg(n0+1, t_MAT);
  for (i=1; i<=n0; i++) TT[i] = 0;
  CM_L = gscalsmat(C, n0);
  /* tmax = current number of traces used (and computed so far) */
  for(tmax = 0;; tmax++)
  {
    long a, b, bmin, bgood, delta, tnew = tmax + 1, r = lg(CM_L)-1;
    GEN oldCM_L, M_L, q, S1, P1, VV;
    int first = 1;

    /* bound for f . S_k(genuine factor) = ZC * bound for T_2(S_tnew) */
    Btra = mulrr(ZC, mulsr(dP*dP, normlp(Br, 2*tnew, dnf)));
    bmin = logint(ceil_safe(sqrtr(Btra)), gen_2, NULL);
    if (DEBUGLEVEL>2)
      fprintferr("\nLLL_cmbf: %ld potential factors (tmax = %ld, bmin = %ld)\n",
                 r, tmax, bmin);

    /* compute Newton sums (possibly relifting first) */
    if (gcmp(GSmin, Btra) < 0)
    {
      nflift_t L1;
      GEN polred;

      bestlift_init(k<<1, nf, T->pr, Btra, &L1);
      polred = ZqX_normalize(Pbase, lP, &L1);
      k      = L1.k;
      pk     = L1.pk;
      PRK    = L1.prk;
      PRKinv = L1.iprk;
      GSmin  = L1.GSmin;
      Tpk    = L1.Tpk;
      famod = hensel_lift_fact(polred, famod, Tpk, p, pk, k);
      for (i=1; i<=n0; i++) TT[i] = 0;
    }
    for (i=1; i<=n0; i++)
    {
      GEN h, lPpow = lP? gpowgs(lP, tnew): NULL;
      GEN z = polsym_gen(gel(famod,i), gel(TT,i), tnew, Tpk, pk);
      gel(TT,i) = z;
      h = gel(z,tnew+1);
      /* make Newton sums integral */
      lPpow = mul_content(lPpow, dn);
      if (lPpow) h = FpX_red(gmul(h,lPpow), pk);
      gel(Tra,i) = nf_bestlift(h, NULL, L); /* S_tnew(famod) */
    }

    /* compute truncation parameter */
    if (DEBUGLEVEL>2) { TIMERstart(&ti2); TIMERstart(&TI); }
    oldCM_L = CM_L;
    av2 = avma;
    b = delta = 0; /* -Wall */
AGAIN:
    M_L = Q_div_to_int(CM_L, utoipos(C));
    VV = get_V(Tra, M_L, PRK, PRKinv, pk, &a);
    if (first)
    { /* initialize lattice, using few p-adic digits for traces */
      bgood = (long)(a - max(32, BitPerFactor * r));
      b = max(bmin, bgood);
      delta = a - b;
    }
    else
    { /* add more p-adic digits and continue reduction */
      if (a < b) b = a;
      b = max(b-delta, bmin);
      if (b - delta/2 < bmin) b = bmin; /* near there. Go all the way */
    }

    /* restart with truncated entries */
    q = int2n(b);
    P1 = gdivround(PRK, q);
    S1 = gdivround(Tra, q);
    T2 = gsub(gmul(S1, M_L), gmul(P1, VV));
    m = vconcat( CM_L, T2 );
    if (first)
    {
      first = 0;
      m = shallowconcat( m, vconcat(ZERO, P1) );
      /*     [ C M_L   0  ]
       * m = [            ]   square matrix
       *     [  T2'   PRK ]   T2' = Tra * M_L  truncated
       */
    }
    CM_L = LLL_check_progress(Bnorm, n0, m, b == bmin, /*dbg:*/ &ti_LLL);
    if (DEBUGLEVEL>2)
      fprintferr("LLL_cmbf: (a,b) =%4ld,%4ld; r =%3ld -->%3ld, time = %ld\n",
                 a,b, lg(m)-1, CM_L? lg(CM_L)-1: 1, TIMER(&TI));
    if (!CM_L) { list = mkcol(QXQX_normalize(P,nfT)); break; }
    if (b > bmin)
    {
      CM_L = gerepilecopy(av2, CM_L);
      goto AGAIN;
    }
    if (DEBUGLEVEL>2) msgTIMER(&ti2, "for this trace");

    i = lg(CM_L) - 1;
    if (i == r && gequal(CM_L, oldCM_L))
    {
      CM_L = oldCM_L;
      avma = av2; continue;
    }

    if (i <= r && i*rec < n0)
    {
      pari_timer ti;
      if (DEBUGLEVEL>2) TIMERstart(&ti);
      list = nf_chk_factors(T, P, Q_div_to_int(CM_L,utoipos(C)), famod, pk);
      if (DEBUGLEVEL>2) ti_CF += TIMER(&ti);
      if (list) break;
      CM_L = gerepilecopy(av2, CM_L);
    }
    if (low_stack(lim, stack_lim(av,1)))
    {
      if(DEBUGMEM>1) pari_warn(warnmem,"nf_LLL_cmbf");
      gerepileall(av, Tpk? 9: 8,
                      &CM_L,&TT,&Tra,&famod,&pk,&GSmin,&PRK,&PRKinv,&Tpk);
    }
  }
  if (DEBUGLEVEL>2)
    fprintferr("* Time LLL: %ld\n* Time Check Factor: %ld\n",ti_LLL,ti_CF);
  return list;
}
Exemplo n.º 5
0
int
invmod(GEN a, GEN b, GEN *res)
#endif
{
  GEN v,v1,d,d1,q,r;
  pari_sp av, av1, lim;
  long s;
  ulong g;
  ulong xu,xu1,xv,xv1;		/* Lehmer stage recurrence matrix */
  int lhmres;			/* Lehmer stage return value */

  if (typ(a) != t_INT || typ(b) != t_INT) pari_err(arither1);
  if (!signe(b)) { *res=absi(a); return 0; }
  av = avma;
  if (lgefint(b) == 3) /* single-word affair */
  {
    ulong d1 = umodiu(a, (ulong)(b[2]));
    if (d1 == 0)
    {
      if (b[2] == 1L)
        { *res = gen_0; return 1; }
      else
        { *res = absi(b); return 0; }
    }
    g = xgcduu((ulong)(b[2]), d1, 1, &xv, &xv1, &s);
#ifdef DEBUG_LEHMER
    fprintferr(" <- %lu,%lu\n", (ulong)(b[2]), (ulong)(d1[2]));
    fprintferr(" -> %lu,%ld,%lu; %lx\n", g,s,xv1,avma);
#endif
    avma = av;
    if (g != 1UL) { *res = utoipos(g); return 0; }
    xv = xv1 % (ulong)(b[2]); if (s < 0) xv = ((ulong)(b[2])) - xv;
    *res = utoipos(xv); return 1;
  }

  (void)new_chunk(lgefint(b));
  d = absi(b); d1 = modii(a,d);

  v=gen_0; v1=gen_1;	/* general case */
#ifdef DEBUG_LEHMER
  fprintferr("INVERT: -------------------------\n");
  output(d1);
#endif
  av1 = avma; lim = stack_lim(av,1);

  while (lgefint(d) > 3 && signe(d1))
  {
#ifdef DEBUG_LEHMER
    fprintferr("Calling Lehmer:\n");
#endif
    lhmres = lgcdii((ulong*)d, (ulong*)d1, &xu, &xu1, &xv, &xv1, MAXULONG);
    if (lhmres != 0)		/* check progress */
    {				/* apply matrix */
#ifdef DEBUG_LEHMER
      fprintferr("Lehmer returned %d [%lu,%lu;%lu,%lu].\n",
	      lhmres, xu, xu1, xv, xv1);
#endif
      if ((lhmres == 1) || (lhmres == -1))
      {
	if (xv1 == 1)
	{
	  r = subii(d,d1); d=d1; d1=r;
	  a = subii(v,v1); v=v1; v1=a;
	}
	else
	{
	  r = subii(d, mului(xv1,d1)); d=d1; d1=r;
	  a = subii(v, mului(xv1,v1)); v=v1; v1=a;
	}
      }
      else
      {
	r  = subii(muliu(d,xu),  muliu(d1,xv));
	a  = subii(muliu(v,xu),  muliu(v1,xv));
	d1 = subii(muliu(d,xu1), muliu(d1,xv1)); d = r;
	v1 = subii(muliu(v,xu1), muliu(v1,xv1)); v = a;
        if (lhmres&1)
	{
          setsigne(d,-signe(d));
          setsigne(v,-signe(v));
        }
        else
	{
          if (signe(d1)) { setsigne(d1,-signe(d1)); }
          setsigne(v1,-signe(v1));
        }
      }
    }
#ifdef DEBUG_LEHMER
    else
      fprintferr("Lehmer returned 0.\n");
    output(d); output(d1); output(v); output(v1);
    sleep(1);
#endif

    if (lhmres <= 0 && signe(d1))
    {
      q = dvmdii(d,d1,&r);
#ifdef DEBUG_LEHMER
      fprintferr("Full division:\n");
      printf("  q = "); output(q); sleep (1);
#endif
      a = subii(v,mulii(q,v1));
      v=v1; v1=a;
      d=d1; d1=r;
    }
    if (low_stack(lim, stack_lim(av,1)))
    {
      GEN *gptr[4]; gptr[0]=&d; gptr[1]=&d1; gptr[2]=&v; gptr[3]=&v1;
      if(DEBUGMEM>1) pari_warn(warnmem,"invmod");
      gerepilemany(av1,gptr,4);
    }
  } /* end while */

  /* Postprocessing - final sprint */
  if (signe(d1))
  {
    /* Assertions: lgefint(d)==lgefint(d1)==3, and
     * gcd(d,d1) is nonzero and fits into one word
     */
    g = xxgcduu((ulong)d[2], (ulong)d1[2], 1, &xu, &xu1, &xv, &xv1, &s);
#ifdef DEBUG_LEHMER
    output(d);output(d1);output(v);output(v1);
    fprintferr(" <- %lu,%lu\n", (ulong)d[2], (ulong)d1[2]);
    fprintferr(" -> %lu,%ld,%lu; %lx\n", g,s,xv1,avma);
#endif
    if (g != 1UL) { avma = av; *res = utoipos(g); return 0; }
    /* (From the xgcduu() blurb:)
     * For finishing the multiword modinv, we now have to multiply the
     * returned matrix  (with properly adjusted signs)  onto the values
     * v' and v1' previously obtained from the multiword division steps.
     * Actually, it is sufficient to take the scalar product of [v',v1']
     * with [u1,-v1], and change the sign if s==1.
     */
    v = subii(muliu(v,xu1),muliu(v1,xv1));
    if (s > 0) setsigne(v,-signe(v));
    avma = av; *res = modii(v,b);
#ifdef DEBUG_LEHMER
    output(*res); fprintfderr("============================Done.\n");
    sleep(1);
#endif
    return 1;
  }
  /* get here when the final sprint was skipped (d1 was zero already) */
  avma = av;
  if (!equalii(d,gen_1)) { *res = icopy(d); return 0; }
  *res = modii(v,b);
#ifdef DEBUG_LEHMER
  output(*res); fprintferr("============================Done.\n");
  sleep(1);
#endif
  return 1;
}