unsigned char BL(int i,int j){ float s=3./(j+250); float y=(j+sin((i*i+_sq(j-700)*5)/100./DIM+P)*15)*s; return ((int)(29*((i+DIM)*s+y))%2+(int)(29*((DIM*2-i)*s+y))%2)*127;}
unsigned char RD(int i,int j){ #define P 6.43 float s=3./(j+250),y=(j+sin((i*i+_sq(j-700)*5)/100./DIM+P)*15)*s; return ((int)((i+DIM)*s+y)%2+(int)((DIM*2-i)*s+y)%2)*127;}
unsigned char GR(int i,int j){ float s=3./(j+99); float y=(j+sin((i*i+_sq(j-700)*5)/100./DIM)*35)*s; return ((int)(5*((i+DIM)*s+y))%2+(int)(5*((DIM*2-i)*s+y))%2)*127; }
unsigned char BL(int i, int j) { return (char)(_sq(cos(atan2(j - 512, i - 512) / 2 + 2 * acos(-1) / 3)) * 255); }
unsigned char RD(int i, int j) { return (char)(_sq(cos(atan2(j - 512, i - 512) / 2)) * 255); }
/* out = (1 / in) % m; ... using Fermat's Little Theorem 44 mul, 262 sq */ static void _inv(fe out, const fe in) { fe o, x2, x4, x8, x16, x32, t[16]; long long i; _sq(o, in); for (i = 0; i < 1 - 1; ++i) _sq(o, o); _mul(x2, o, in); _sq(o, x2); for (i = 0; i < 2 - 1; ++i) _sq(o, o); _mul(x4, o, x2); _sq(o, x4); for (i = 0; i < 4 - 1; ++i) _sq(o, o); _mul(x8, o, x4); _sq(o, x8); for (i = 0; i < 8 - 1; ++i) _sq(o, o); _mul(x16, o, x8); _sq(o, x16); for (i = 0; i < 16 - 1; ++i) _sq(o, o); _mul(x32, o, x16); _sq(o, x32); for (i = 0; i < 32 - 1; ++i) _sq(o, o); for (i = 0; i < 32; ++i) _sq(o, o); _mul(o, o, x32); for (i = 0; i < 32; ++i) _sq(o, o); _mul(o, o, x32); fe_0(t[0]); fe_copy(t[1], in); _sq(t[2], t[1]); fe_copy(t[3], x2); for (i = 4; i < 15; ++i) { if ((i & 1) == 0) _sq(t[i], t[i / 2]); else _mul(t[i], t[i - 1], in); } fe_copy(t[15], x4); for (i = 0; i < 32; ++i) { _sq(o, o); _sq(o, o); _sq(o, o); _sq(o, o); _mul(o, o, t[m2[i]]); } fe_copy(out, o); cleanup(o); cleanup(t); cleanup(x2); cleanup(x4); cleanup(x8); cleanup(x16); cleanup(x32); }