/* return a bound for T_2(P), P | polbase in C[X] * NB: Mignotte bound: A | S ==> * |a_i| <= binom(d-1, i-1) || S ||_2 + binom(d-1, i) lc(S) * * Apply to sigma(S) for all embeddings sigma, then take the L_2 norm over * sigma, then take the sup over i. **/ static GEN nf_Mignotte_bound(GEN nf, GEN polbase) { GEN G = gmael(nf,5,2), lS = leading_term(polbase); /* t_INT */ GEN p1, C, N2, matGS, binlS, bin; long prec, i, j, d = degpol(polbase), n = degpol(nf[1]), r1 = nf_get_r1(nf); binlS = bin = vecbinome(d-1); if (!gcmp1(lS)) binlS = gmul(lS, bin); N2 = cgetg(n+1, t_VEC); prec = gprecision(G); for (;;) { nffp_t F; matGS = cgetg(d+2, t_MAT); for (j=0; j<=d; j++) gel(matGS,j+1) = arch_for_T2(G, gel(polbase,j+2)); matGS = shallowtrans(matGS); for (j=1; j <= r1; j++) /* N2[j] = || sigma_j(S) ||_2 */ { gel(N2,j) = gsqrt( QuickNormL2(gel(matGS,j), DEFAULTPREC), DEFAULTPREC ); if (lg(N2[j]) < DEFAULTPREC) goto PRECPB; } for ( ; j <= n; j+=2) { GEN q1 = QuickNormL2(gel(matGS,j ), DEFAULTPREC); GEN q2 = QuickNormL2(gel(matGS,j+1), DEFAULTPREC); p1 = gmul2n(mpadd(q1, q2), -1); gel(N2,j) = gel(N2,j+1) = gsqrt( p1, DEFAULTPREC ); if (lg(N2[j]) < DEFAULTPREC) goto PRECPB; } if (j > n) break; /* done */ PRECPB: prec = (prec<<1)-2; remake_GM(nf, &F, prec); G = F.G; if (DEBUGLEVEL>1) pari_warn(warnprec, "nf_factor_bound", prec); } /* Take sup over 0 <= i <= d of * sum_sigma | binom(d-1, i-1) ||sigma(S)||_2 + binom(d-1,i) lc(S) |^2 */ /* i = 0: n lc(S)^2 */ C = mulsi(n, sqri(lS)); /* i = d: sum_sigma ||sigma(S)||_2^2 */ p1 = gnorml2(N2); if (gcmp(C, p1) < 0) C = p1; for (i = 1; i < d; i++) { GEN s = gen_0; for (j = 1; j <= n; j++) { p1 = mpadd( mpmul(gel(bin,i), gel(N2,j)), gel(binlS,i+1) ); s = mpadd(s, gsqr(p1)); } if (gcmp(C, s) < 0) C = s; } return C; }
static void bestlift_init(long a, GEN nf, GEN pr, GEN C, nflift_t *L) { const long D = 100; const double alpha = ((double)D-1) / D; /* LLL parameter */ const long d = degpol(nf[1]); pari_sp av = avma; GEN prk, PRK, B, GSmin, pk; pari_timer ti; TIMERstart(&ti); if (!a) a = (long)bestlift_bound(C, d, alpha, pr_norm(pr)); for (;; avma = av, a<<=1) { if (DEBUGLEVEL>2) fprintferr("exponent: %ld\n",a); PRK = prk = idealpows(nf, pr, a); pk = gcoeff(prk,1,1); /* reduce size first, "scramble" matrix */ PRK = lllintpartial_ip(PRK); /* now floating point reduction is fast */ PRK = lllint_fp_ip(PRK, 4); PRK = lllint_i(PRK, D, 0, NULL, NULL, &B); if (!PRK) { PRK = prk; GSmin = pk; } /* nf = Q */ else { pari_sp av2 = avma; GEN S = invmat( get_R(PRK) ), BB = GS_norms(B, DEFAULTPREC); GEN smax = gen_0; long i, j; for (i=1; i<=d; i++) { GEN s = gen_0; for (j=1; j<=d; j++) s = gadd(s, gdiv( gsqr(gcoeff(S,i,j)), gel(BB,j))); if (gcmp(s, smax) > 0) smax = s; } GSmin = gerepileupto(av2, ginv(gmul2n(smax, 2))); } if (gcmp(GSmin, C) >= 0) break; } if (DEBUGLEVEL>2) fprintferr("for this exponent, GSmin = %Z\nTime reduction: %ld\n", GSmin, TIMER(&ti)); L->k = a; L->den = L->pk = pk; L->prk = PRK; L->iprk = ZM_inv(PRK, pk); L->GSmin= GSmin; L->prkHNF = prk; init_proj(L, gel(nf,1), gel(pr,1)); }
static GEN bound_for_coeff(long m, GEN rr, GEN *maxroot) { long i,r1, lrr=lg(rr); GEN p1,b1,b2,B,M, C = matpascal(m-1); for (r1=1; r1 < lrr; r1++) if (typ(rr[r1]) != t_REAL) break; r1--; rr = gabs(rr,0); *maxroot = vecmax(rr); for (i=1; i<lrr; i++) if (gcmp(gel(rr,i), gen_1) < 0) gel(rr,i) = gen_1; for (b1=gen_1,i=1; i<=r1; i++) b1 = gmul(b1, gel(rr,i)); for (b2=gen_1 ; i<lrr; i++) b2 = gmul(b2, gel(rr,i)); B = gmul(b1, gsqr(b2)); /* Mahler measure */ M = cgetg(m+2, t_VEC); gel(M,1) = gel(M,2) = gen_0; /* unused */ for (i=1; i<m; i++) { p1 = gadd(gmul(gcoeff(C, m, i+1), B),/* binom(m-1, i) */ gcoeff(C, m, i)); /* binom(m-1, i-1) */ gel(M,i+2) = ceil_safe(p1); } return M; }
/* return a minimal lift of elt modulo id */ static GEN nf_bestlift(GEN elt, GEN bound, nflift_t *L) { GEN u; long i,l = lg(L->prk), t = typ(elt); if (t != t_INT) { if (t == t_POL) elt = mulmat_pol(L->tozk, elt); u = gmul(L->iprk,elt); for (i=1; i<l; i++) gel(u,i) = diviiround(gel(u,i), L->den); } else { u = gmul(elt, gel(L->iprk,1)); for (i=1; i<l; i++) gel(u,i) = diviiround(gel(u,i), L->den); elt = gscalcol(elt, l-1); } u = gsub(elt, gmul(L->prk, u)); if (bound && gcmp(QuickNormL2(u,DEFAULTPREC), bound) > 0) u = NULL; return u; }
static GEN nf_LLL_cmbf(nfcmbf_t *T, GEN p, long k, long rec) { nflift_t *L = T->L; GEN pk = L->pk, PRK = L->prk, PRKinv = L->iprk, GSmin = L->GSmin; GEN Tpk = L->Tpk; GEN famod = T->fact, nf = T->nf, ZC = T->ZC, Br = T->Br; GEN Pbase = T->polbase, P = T->pol, dn = T->dn; GEN nfT = gel(nf,1); GEN Btra; long dnf = degpol(nfT), dP = degpol(P); double BitPerFactor = 0.5; /* nb bits / modular factor */ long i, C, tmax, n0; GEN lP, Bnorm, Tra, T2, TT, CM_L, m, list, ZERO; double Bhigh; pari_sp av, av2, lim; long ti_LLL = 0, ti_CF = 0; pari_timer ti2, TI; lP = absi(leading_term(P)); if (is_pm1(lP)) lP = NULL; n0 = lg(famod) - 1; /* Lattice: (S PRK), small vector (vS vP). To find k bound for the image, * write S = S1 q + S0, P = P1 q + P0 * |S1 vS + P1 vP|^2 <= Bhigh for all (vS,vP) assoc. to true factors */ Btra = mulrr(ZC, mulsr(dP*dP, normlp(Br, 2, dnf))); Bhigh = get_Bhigh(n0, dnf); C = (long)ceil(sqrt(Bhigh/n0)) + 1; /* C^2 n0 ~ Bhigh */ Bnorm = dbltor( n0 * C * C + Bhigh ); ZERO = zeromat(n0, dnf); av = avma; lim = stack_lim(av, 1); TT = cgetg(n0+1, t_VEC); Tra = cgetg(n0+1, t_MAT); for (i=1; i<=n0; i++) TT[i] = 0; CM_L = gscalsmat(C, n0); /* tmax = current number of traces used (and computed so far) */ for(tmax = 0;; tmax++) { long a, b, bmin, bgood, delta, tnew = tmax + 1, r = lg(CM_L)-1; GEN oldCM_L, M_L, q, S1, P1, VV; int first = 1; /* bound for f . S_k(genuine factor) = ZC * bound for T_2(S_tnew) */ Btra = mulrr(ZC, mulsr(dP*dP, normlp(Br, 2*tnew, dnf))); bmin = logint(ceil_safe(sqrtr(Btra)), gen_2, NULL); if (DEBUGLEVEL>2) fprintferr("\nLLL_cmbf: %ld potential factors (tmax = %ld, bmin = %ld)\n", r, tmax, bmin); /* compute Newton sums (possibly relifting first) */ if (gcmp(GSmin, Btra) < 0) { nflift_t L1; GEN polred; bestlift_init(k<<1, nf, T->pr, Btra, &L1); polred = ZqX_normalize(Pbase, lP, &L1); k = L1.k; pk = L1.pk; PRK = L1.prk; PRKinv = L1.iprk; GSmin = L1.GSmin; Tpk = L1.Tpk; famod = hensel_lift_fact(polred, famod, Tpk, p, pk, k); for (i=1; i<=n0; i++) TT[i] = 0; } for (i=1; i<=n0; i++) { GEN h, lPpow = lP? gpowgs(lP, tnew): NULL; GEN z = polsym_gen(gel(famod,i), gel(TT,i), tnew, Tpk, pk); gel(TT,i) = z; h = gel(z,tnew+1); /* make Newton sums integral */ lPpow = mul_content(lPpow, dn); if (lPpow) h = FpX_red(gmul(h,lPpow), pk); gel(Tra,i) = nf_bestlift(h, NULL, L); /* S_tnew(famod) */ } /* compute truncation parameter */ if (DEBUGLEVEL>2) { TIMERstart(&ti2); TIMERstart(&TI); } oldCM_L = CM_L; av2 = avma; b = delta = 0; /* -Wall */ AGAIN: M_L = Q_div_to_int(CM_L, utoipos(C)); VV = get_V(Tra, M_L, PRK, PRKinv, pk, &a); if (first) { /* initialize lattice, using few p-adic digits for traces */ bgood = (long)(a - max(32, BitPerFactor * r)); b = max(bmin, bgood); delta = a - b; } else { /* add more p-adic digits and continue reduction */ if (a < b) b = a; b = max(b-delta, bmin); if (b - delta/2 < bmin) b = bmin; /* near there. Go all the way */ } /* restart with truncated entries */ q = int2n(b); P1 = gdivround(PRK, q); S1 = gdivround(Tra, q); T2 = gsub(gmul(S1, M_L), gmul(P1, VV)); m = vconcat( CM_L, T2 ); if (first) { first = 0; m = shallowconcat( m, vconcat(ZERO, P1) ); /* [ C M_L 0 ] * m = [ ] square matrix * [ T2' PRK ] T2' = Tra * M_L truncated */ } CM_L = LLL_check_progress(Bnorm, n0, m, b == bmin, /*dbg:*/ &ti_LLL); if (DEBUGLEVEL>2) fprintferr("LLL_cmbf: (a,b) =%4ld,%4ld; r =%3ld -->%3ld, time = %ld\n", a,b, lg(m)-1, CM_L? lg(CM_L)-1: 1, TIMER(&TI)); if (!CM_L) { list = mkcol(QXQX_normalize(P,nfT)); break; } if (b > bmin) { CM_L = gerepilecopy(av2, CM_L); goto AGAIN; } if (DEBUGLEVEL>2) msgTIMER(&ti2, "for this trace"); i = lg(CM_L) - 1; if (i == r && gequal(CM_L, oldCM_L)) { CM_L = oldCM_L; avma = av2; continue; } if (i <= r && i*rec < n0) { pari_timer ti; if (DEBUGLEVEL>2) TIMERstart(&ti); list = nf_chk_factors(T, P, Q_div_to_int(CM_L,utoipos(C)), famod, pk); if (DEBUGLEVEL>2) ti_CF += TIMER(&ti); if (list) break; CM_L = gerepilecopy(av2, CM_L); } if (low_stack(lim, stack_lim(av,1))) { if(DEBUGMEM>1) pari_warn(warnmem,"nf_LLL_cmbf"); gerepileall(av, Tpk? 9: 8, &CM_L,&TT,&Tra,&famod,&pk,&GSmin,&PRK,&PRKinv,&Tpk); } } if (DEBUGLEVEL>2) fprintferr("* Time LLL: %ld\n* Time Check Factor: %ld\n",ti_LLL,ti_CF); return list; }
/* * generate comparison of nl, nr, both 64-bit. * nl is memory; nr is constant or memory. */ void cmp64(Node *nl, Node *nr, int op, Prog *to) { Node lo1, hi1, lo2, hi2, r1, r2; Prog *br; Type *t; split64(nl, &lo1, &hi1); split64(nr, &lo2, &hi2); // compare most significant word; // if they differ, we're done. t = hi1.type; regalloc(&r1, types[TINT32], N); regalloc(&r2, types[TINT32], N); gins(AMOVW, &hi1, &r1); gins(AMOVW, &hi2, &r2); gcmp(ACMP, &r1, &r2); regfree(&r1); regfree(&r2); br = P; switch(op) { default: fatal("cmp64 %O %T", op, t); case OEQ: // cmp hi // bne L // cmp lo // beq to // L: br = gbranch(ABNE, T); break; case ONE: // cmp hi // bne to // cmp lo // bne to patch(gbranch(ABNE, T), to); break; case OGE: case OGT: // cmp hi // bgt to // blt L // cmp lo // bge to (or bgt to) // L: patch(gbranch(optoas(OGT, t), T), to); br = gbranch(optoas(OLT, t), T); break; case OLE: case OLT: // cmp hi // blt to // bgt L // cmp lo // ble to (or jlt to) // L: patch(gbranch(optoas(OLT, t), T), to); br = gbranch(optoas(OGT, t), T); break; } // compare least significant word t = lo1.type; regalloc(&r1, types[TINT32], N); regalloc(&r2, types[TINT32], N); gins(AMOVW, &lo1, &r1); gins(AMOVW, &lo2, &r2); gcmp(ACMP, &r1, &r2); regfree(&r1); regfree(&r2); // jump again patch(gbranch(optoas(op, t), T), to); // point first branch down here if appropriate if(br != P) patch(br, pc); splitclean(); splitclean(); }
/* * attempt to generate 64-bit * res = n * return 1 on success, 0 if op not handled. */ void cgen64(Node *n, Node *res) { Node t1, t2, *l, *r; Node lo1, lo2, hi1, hi2; Node al, ah, bl, bh, cl, ch, s, n1, creg; Prog *p1, *p2, *p3, *p4, *p5, *p6; uint64 v; if(res->op != OINDREG && res->op != ONAME) { dump("n", n); dump("res", res); fatal("cgen64 %O of %O", n->op, res->op); } l = n->left; if(!l->addable) { tempname(&t1, l->type); cgen(l, &t1); l = &t1; } split64(l, &lo1, &hi1); switch(n->op) { default: fatal("cgen64 %O", n->op); case OMINUS: split64(res, &lo2, &hi2); regalloc(&t1, lo1.type, N); regalloc(&al, lo1.type, N); regalloc(&ah, hi1.type, N); gins(AMOVW, &lo1, &al); gins(AMOVW, &hi1, &ah); gmove(ncon(0), &t1); p1 = gins(ASUB, &al, &t1); p1->scond |= C_SBIT; gins(AMOVW, &t1, &lo2); gmove(ncon(0), &t1); gins(ASBC, &ah, &t1); gins(AMOVW, &t1, &hi2); regfree(&t1); regfree(&al); regfree(&ah); splitclean(); splitclean(); return; case OCOM: split64(res, &lo2, &hi2); regalloc(&n1, lo1.type, N); gins(AMOVW, &lo1, &n1); gins(AMVN, &n1, &n1); gins(AMOVW, &n1, &lo2); gins(AMOVW, &hi1, &n1); gins(AMVN, &n1, &n1); gins(AMOVW, &n1, &hi2); regfree(&n1); splitclean(); splitclean(); return; case OADD: case OSUB: case OMUL: case OLSH: case ORSH: case OAND: case OOR: case OXOR: // binary operators. // common setup below. break; } // setup for binary operators r = n->right; if(r != N && !r->addable) { tempname(&t2, r->type); cgen(r, &t2); r = &t2; } if(is64(r->type)) split64(r, &lo2, &hi2); regalloc(&al, lo1.type, N); regalloc(&ah, hi1.type, N); // Do op. Leave result in ah:al. switch(n->op) { default: fatal("cgen64: not implemented: %N\n", n); case OADD: // TODO: Constants regalloc(&bl, types[TPTR32], N); regalloc(&bh, types[TPTR32], N); gins(AMOVW, &hi1, &ah); gins(AMOVW, &lo1, &al); gins(AMOVW, &hi2, &bh); gins(AMOVW, &lo2, &bl); p1 = gins(AADD, &bl, &al); p1->scond |= C_SBIT; gins(AADC, &bh, &ah); regfree(&bl); regfree(&bh); break; case OSUB: // TODO: Constants. regalloc(&bl, types[TPTR32], N); regalloc(&bh, types[TPTR32], N); gins(AMOVW, &lo1, &al); gins(AMOVW, &hi1, &ah); gins(AMOVW, &lo2, &bl); gins(AMOVW, &hi2, &bh); p1 = gins(ASUB, &bl, &al); p1->scond |= C_SBIT; gins(ASBC, &bh, &ah); regfree(&bl); regfree(&bh); break; case OMUL: // TODO(kaib): this can be done with 4 regs and does not need 6 regalloc(&bl, types[TPTR32], N); regalloc(&bh, types[TPTR32], N); regalloc(&cl, types[TPTR32], N); regalloc(&ch, types[TPTR32], N); // load args into bh:bl and bh:bl. gins(AMOVW, &hi1, &bh); gins(AMOVW, &lo1, &bl); gins(AMOVW, &hi2, &ch); gins(AMOVW, &lo2, &cl); // bl * cl p1 = gins(AMULLU, N, N); p1->from.type = D_REG; p1->from.reg = bl.val.u.reg; p1->reg = cl.val.u.reg; p1->to.type = D_REGREG; p1->to.reg = ah.val.u.reg; p1->to.offset = al.val.u.reg; //print("%P\n", p1); // bl * ch p1 = gins(AMULA, N, N); p1->from.type = D_REG; p1->from.reg = bl.val.u.reg; p1->reg = ch.val.u.reg; p1->to.type = D_REGREG; p1->to.reg = ah.val.u.reg; p1->to.offset = ah.val.u.reg; //print("%P\n", p1); // bh * cl p1 = gins(AMULA, N, N); p1->from.type = D_REG; p1->from.reg = bh.val.u.reg; p1->reg = cl.val.u.reg; p1->to.type = D_REGREG; p1->to.reg = ah.val.u.reg; p1->to.offset = ah.val.u.reg; //print("%P\n", p1); regfree(&bh); regfree(&bl); regfree(&ch); regfree(&cl); break; case OLSH: regalloc(&bl, lo1.type, N); regalloc(&bh, hi1.type, N); gins(AMOVW, &hi1, &bh); gins(AMOVW, &lo1, &bl); if(r->op == OLITERAL) { v = mpgetfix(r->val.u.xval); if(v >= 64) { // TODO(kaib): replace with gins(AMOVW, nodintconst(0), &al) // here and below (verify it optimizes to EOR) gins(AEOR, &al, &al); gins(AEOR, &ah, &ah); } else if(v > 32) { gins(AEOR, &al, &al); // MOVW bl<<(v-32), ah gshift(AMOVW, &bl, SHIFT_LL, (v-32), &ah); } else if(v == 32) { gins(AEOR, &al, &al); gins(AMOVW, &bl, &ah); } else if(v > 0) { // MOVW bl<<v, al gshift(AMOVW, &bl, SHIFT_LL, v, &al); // MOVW bh<<v, ah gshift(AMOVW, &bh, SHIFT_LL, v, &ah); // OR bl>>(32-v), ah gshift(AORR, &bl, SHIFT_LR, 32-v, &ah); } else { gins(AMOVW, &bl, &al); gins(AMOVW, &bh, &ah); } goto olsh_break; } regalloc(&s, types[TUINT32], N); regalloc(&creg, types[TUINT32], N); if (is64(r->type)) { // shift is >= 1<<32 split64(r, &cl, &ch); gmove(&ch, &s); p1 = gins(AMOVW, &s, &s); p1->scond |= C_SBIT; p6 = gbranch(ABNE, T); gmove(&cl, &s); splitclean(); } else { gmove(r, &s); p6 = P; } p1 = gins(AMOVW, &s, &s); p1->scond |= C_SBIT; // shift == 0 p1 = gins(AMOVW, &bl, &al); p1->scond = C_SCOND_EQ; p1 = gins(AMOVW, &bh, &ah); p1->scond = C_SCOND_EQ; p2 = gbranch(ABEQ, T); // shift is < 32 nodconst(&n1, types[TUINT32], 32); gmove(&n1, &creg); gcmp(ACMP, &s, &creg); // MOVW.LO bl<<s, al p1 = gregshift(AMOVW, &bl, SHIFT_LL, &s, &al); p1->scond = C_SCOND_LO; // MOVW.LO bh<<s, ah p1 = gregshift(AMOVW, &bh, SHIFT_LL, &s, &ah); p1->scond = C_SCOND_LO; // SUB.LO s, creg p1 = gins(ASUB, &s, &creg); p1->scond = C_SCOND_LO; // OR.LO bl>>creg, ah p1 = gregshift(AORR, &bl, SHIFT_LR, &creg, &ah); p1->scond = C_SCOND_LO; // BLO end p3 = gbranch(ABLO, T); // shift == 32 p1 = gins(AEOR, &al, &al); p1->scond = C_SCOND_EQ; p1 = gins(AMOVW, &bl, &ah); p1->scond = C_SCOND_EQ; p4 = gbranch(ABEQ, T); // shift is < 64 nodconst(&n1, types[TUINT32], 64); gmove(&n1, &creg); gcmp(ACMP, &s, &creg); // EOR.LO al, al p1 = gins(AEOR, &al, &al); p1->scond = C_SCOND_LO; // MOVW.LO creg>>1, creg p1 = gshift(AMOVW, &creg, SHIFT_LR, 1, &creg); p1->scond = C_SCOND_LO; // SUB.LO creg, s p1 = gins(ASUB, &creg, &s); p1->scond = C_SCOND_LO; // MOVW bl<<s, ah p1 = gregshift(AMOVW, &bl, SHIFT_LL, &s, &ah); p1->scond = C_SCOND_LO; p5 = gbranch(ABLO, T); // shift >= 64 if (p6 != P) patch(p6, pc); gins(AEOR, &al, &al); gins(AEOR, &ah, &ah); patch(p2, pc); patch(p3, pc); patch(p4, pc); patch(p5, pc); regfree(&s); regfree(&creg); olsh_break: regfree(&bl); regfree(&bh); break; case ORSH: regalloc(&bl, lo1.type, N); regalloc(&bh, hi1.type, N); gins(AMOVW, &hi1, &bh); gins(AMOVW, &lo1, &bl); if(r->op == OLITERAL) { v = mpgetfix(r->val.u.xval); if(v >= 64) { if(bh.type->etype == TINT32) { // MOVW bh->31, al gshift(AMOVW, &bh, SHIFT_AR, 31, &al); // MOVW bh->31, ah gshift(AMOVW, &bh, SHIFT_AR, 31, &ah); } else { gins(AEOR, &al, &al); gins(AEOR, &ah, &ah); } } else if(v > 32) { if(bh.type->etype == TINT32) { // MOVW bh->(v-32), al gshift(AMOVW, &bh, SHIFT_AR, v-32, &al); // MOVW bh->31, ah gshift(AMOVW, &bh, SHIFT_AR, 31, &ah); } else { // MOVW bh>>(v-32), al gshift(AMOVW, &bh, SHIFT_LR, v-32, &al); gins(AEOR, &ah, &ah); } } else if(v == 32) { gins(AMOVW, &bh, &al); if(bh.type->etype == TINT32) { // MOVW bh->31, ah gshift(AMOVW, &bh, SHIFT_AR, 31, &ah); } else { gins(AEOR, &ah, &ah); } } else if( v > 0) { // MOVW bl>>v, al gshift(AMOVW, &bl, SHIFT_LR, v, &al); // OR bh<<(32-v), al gshift(AORR, &bh, SHIFT_LL, 32-v, &al); if(bh.type->etype == TINT32) { // MOVW bh->v, ah gshift(AMOVW, &bh, SHIFT_AR, v, &ah); } else { // MOVW bh>>v, ah gshift(AMOVW, &bh, SHIFT_LR, v, &ah); } } else { gins(AMOVW, &bl, &al); gins(AMOVW, &bh, &ah); } goto orsh_break; } regalloc(&s, types[TUINT32], N); regalloc(&creg, types[TUINT32], N); if (is64(r->type)) { // shift is >= 1<<32 split64(r, &cl, &ch); gmove(&ch, &s); p1 = gins(AMOVW, &s, &s); p1->scond |= C_SBIT; p6 = gbranch(ABNE, T); gmove(&cl, &s); splitclean(); } else { gmove(r, &s); p6 = P; } p1 = gins(AMOVW, &s, &s); p1->scond |= C_SBIT; // shift == 0 p1 = gins(AMOVW, &bl, &al); p1->scond = C_SCOND_EQ; p1 = gins(AMOVW, &bh, &ah); p1->scond = C_SCOND_EQ; p2 = gbranch(ABEQ, T); // check if shift is < 32 nodconst(&n1, types[TUINT32], 32); gmove(&n1, &creg); gcmp(ACMP, &s, &creg); // MOVW.LO bl>>s, al p1 = gregshift(AMOVW, &bl, SHIFT_LR, &s, &al); p1->scond = C_SCOND_LO; // SUB.LO s,creg p1 = gins(ASUB, &s, &creg); p1->scond = C_SCOND_LO; // OR.LO bh<<(32-s), al p1 = gregshift(AORR, &bh, SHIFT_LL, &creg, &al); p1->scond = C_SCOND_LO; if(bh.type->etype == TINT32) { // MOVW bh->s, ah p1 = gregshift(AMOVW, &bh, SHIFT_AR, &s, &ah); } else { // MOVW bh>>s, ah p1 = gregshift(AMOVW, &bh, SHIFT_LR, &s, &ah); } p1->scond = C_SCOND_LO; // BLO end p3 = gbranch(ABLO, T); // shift == 32 if(bh.type->etype == TINT32) p1 = gshift(AMOVW, &bh, SHIFT_AR, 31, &ah); else p1 = gins(AEOR, &al, &al); p1->scond = C_SCOND_EQ; p1 = gins(AMOVW, &bh, &al); p1->scond = C_SCOND_EQ; p4 = gbranch(ABEQ, T); // check if shift is < 64 nodconst(&n1, types[TUINT32], 64); gmove(&n1, &creg); gcmp(ACMP, &s, &creg); // MOVW.LO creg>>1, creg p1 = gshift(AMOVW, &creg, SHIFT_LR, 1, &creg); p1->scond = C_SCOND_LO; // SUB.LO creg, s p1 = gins(ASUB, &creg, &s); p1->scond = C_SCOND_LO; if(bh.type->etype == TINT32) { // MOVW bh->(s-32), al p1 = gregshift(AMOVW, &bh, SHIFT_AR, &s, &al); p1->scond = C_SCOND_LO; // MOVW bh->31, ah p1 = gshift(AMOVW, &bh, SHIFT_AR, 31, &ah); p1->scond = C_SCOND_LO; } else { // MOVW bh>>(v-32), al p1 = gregshift(AMOVW, &bh, SHIFT_LR, &s, &al); p1->scond = C_SCOND_LO; p1 = gins(AEOR, &ah, &ah); p1->scond = C_SCOND_LO; } // BLO end p5 = gbranch(ABLO, T); // s >= 64 if (p6 != P) patch(p6, pc); if(bh.type->etype == TINT32) { // MOVW bh->31, al gshift(AMOVW, &bh, SHIFT_AR, 31, &al); // MOVW bh->31, ah gshift(AMOVW, &bh, SHIFT_AR, 31, &ah); } else { gins(AEOR, &al, &al); gins(AEOR, &ah, &ah); } patch(p2, pc); patch(p3, pc); patch(p4, pc); patch(p5, pc); regfree(&s); regfree(&creg); orsh_break: regfree(&bl); regfree(&bh); break; case OXOR: case OAND: case OOR: // TODO(kaib): literal optimizations // make constant the right side (it usually is anyway). // if(lo1.op == OLITERAL) { // nswap(&lo1, &lo2); // nswap(&hi1, &hi2); // } // if(lo2.op == OLITERAL) { // // special cases for constants. // lv = mpgetfix(lo2.val.u.xval); // hv = mpgetfix(hi2.val.u.xval); // splitclean(); // right side // split64(res, &lo2, &hi2); // switch(n->op) { // case OXOR: // gmove(&lo1, &lo2); // gmove(&hi1, &hi2); // switch(lv) { // case 0: // break; // case 0xffffffffu: // gins(ANOTL, N, &lo2); // break; // default: // gins(AXORL, ncon(lv), &lo2); // break; // } // switch(hv) { // case 0: // break; // case 0xffffffffu: // gins(ANOTL, N, &hi2); // break; // default: // gins(AXORL, ncon(hv), &hi2); // break; // } // break; // case OAND: // switch(lv) { // case 0: // gins(AMOVL, ncon(0), &lo2); // break; // default: // gmove(&lo1, &lo2); // if(lv != 0xffffffffu) // gins(AANDL, ncon(lv), &lo2); // break; // } // switch(hv) { // case 0: // gins(AMOVL, ncon(0), &hi2); // break; // default: // gmove(&hi1, &hi2); // if(hv != 0xffffffffu) // gins(AANDL, ncon(hv), &hi2); // break; // } // break; // case OOR: // switch(lv) { // case 0: // gmove(&lo1, &lo2); // break; // case 0xffffffffu: // gins(AMOVL, ncon(0xffffffffu), &lo2); // break; // default: // gmove(&lo1, &lo2); // gins(AORL, ncon(lv), &lo2); // break; // } // switch(hv) { // case 0: // gmove(&hi1, &hi2); // break; // case 0xffffffffu: // gins(AMOVL, ncon(0xffffffffu), &hi2); // break; // default: // gmove(&hi1, &hi2); // gins(AORL, ncon(hv), &hi2); // break; // } // break; // } // splitclean(); // splitclean(); // goto out; // } regalloc(&n1, lo1.type, N); gins(AMOVW, &lo1, &al); gins(AMOVW, &hi1, &ah); gins(AMOVW, &lo2, &n1); gins(optoas(n->op, lo1.type), &n1, &al); gins(AMOVW, &hi2, &n1); gins(optoas(n->op, lo1.type), &n1, &ah); regfree(&n1); break; } if(is64(r->type)) splitclean(); splitclean(); split64(res, &lo1, &hi1); gins(AMOVW, &al, &lo1); gins(AMOVW, &ah, &hi1); splitclean(); //out: regfree(&al); regfree(&ah); }
void pariplot(GEN a, GEN b, GEN code, GEN ysmlu,GEN ybigu, long prec) { const char BLANK = ' ', YY = '|', XX_UPPER = '\'', XX_LOWER = '.'; long jz, j, i, sig; pari_sp av = avma; int jnew, jpre = 0; /* for lint */ GEN x, dx; double diff, dyj, ysml, ybig, y[ISCR+1]; screen scr; char buf[80], z; sig=gcmp(b,a); if (!sig) return; if (sig<0) { x=a; a=b; b=x; } x = gtofp(a, prec); push_lex(x, code); dx = divru(gtofp(gsub(b,a),prec), ISCR-1); for (j=1; j<=JSCR; j++) scr[1][j]=scr[ISCR][j]=YY; for (i=2; i<ISCR; i++) { scr[i][1] = XX_LOWER; scr[i][JSCR]= XX_UPPER; for (j=2; j<JSCR; j++) scr[i][j] = BLANK; } ysml = ybig = 0.; /* -Wall */ for (i=1; i<=ISCR; i++) { pari_sp av2 = avma; y[i] = gtodouble( READ_EXPR(code,x) ); avma = av2; if (i == 1) ysml = ybig = y[1]; else { if (y[i] < ysml) ysml = y[i]; if (y[i] > ybig) ybig = y[i]; } x = addrr(x,dx); } avma = av; if (ysmlu) ysml = gtodouble(ysmlu); if (ybigu) ybig = gtodouble(ybigu); diff = ybig - ysml; if (!diff) { ybig += 1; diff= 1.; } dyj = ((JSCR-1)*3+2) / diff; /* work around bug in gcc-4.8 (32bit): plot(x=-5,5,sin(x)))) */ jz = 3 - (long)(ysml*dyj + 0.5); /* 3 - DTOL(ysml*dyj) */ z = PICTZERO(jz); jz /= 3; for (i=1; i<=ISCR; i++) { if (0<=jz && jz<=JSCR) scr[i][jz]=z; j = 3 + DTOL((y[i]-ysml)*dyj); jnew = j/3; if (i > 1) fill_gap(scr, i, jnew, jpre); if (0<=jnew && jnew<=JSCR) scr[i][jnew] = PICT(j); jpre = jnew; } pari_putc('\n'); pari_printf("%s ", dsprintf9(ybig, buf)); for (i=1; i<=ISCR; i++) pari_putc(scr[i][JSCR]); pari_putc('\n'); for (j=(JSCR-1); j>=2; j--) { pari_puts(" "); for (i=1; i<=ISCR; i++) pari_putc(scr[i][j]); pari_putc('\n'); } pari_printf("%s ", dsprintf9(ysml, buf)); for (i=1; i<=ISCR; i++) pari_putc(scr[i][1]); pari_putc('\n'); { char line[10 + 32 + 32 + ISCR - 9]; sprintf(line, "%10s%-9.7g%*.7g\n"," ",todbl(a),ISCR-9,todbl(b)); pari_printf(line); } pop_lex(1); }