int WolfeLineSearch(FunctorType &func, Scalar &alpha, XType &x1, Scalar &f1, XType &gradx1, const XType &p, const XType &x0, const Scalar &f0, const XType &gradx0, const Scalar &c1, const Scalar &c2, const Scalar &minAlpha, const Scalar &maxLSIts, const Scalar &maxLSRestarts) { const Scalar dfp(gradx0.dot(p)); const Scalar c1dfp(c1*dfp); const Scalar c2dfp(c2*dfp); Scalar alpha0(minAlpha); Scalar alpha1(alpha); Scalar prevF(f0); XType prevDF(gradx0); Scalar prevDFp(dfp); Scalar newDFp; int retCode = 0, nits = 0, lsRestarts = 0, ret; while (1) { if (nits >= maxLSIts) { retCode = 1; break; } x1.noalias() = x0 + alpha1 * p; ret = func(x1, f1, gradx1); if (ret != 0) { if (lsRestarts >= maxLSRestarts) { retCode = 1; break; } alpha1 = 0.5 * (alpha0 + alpha1); lsRestarts++; continue; } lsRestarts = 0; newDFp = gradx1.dot(p); if ((f1 > f0 + alpha * c1dfp) || (f1 >= prevF && nits > 0)) { retCode = WolfLSZoom(alpha, x1, f1, gradx1, func, x0, f0, dfp, c1dfp, c2dfp, p, alpha0, prevF, prevDFp, alpha1, f1, newDFp, 1e-16); break; } if (std::fabs(newDFp) <= -c2dfp) { alpha = alpha1; break; } if (newDFp >= 0) { retCode = WolfLSZoom(alpha, x1, f1, gradx1, func, x0, f0, dfp, c1dfp, c2dfp, p, alpha1, f1, newDFp, alpha0, prevF, prevDFp, 1e-16); break; } alpha0 = alpha1; prevF = f1; std::swap(prevDF, gradx1); prevDFp = newDFp; alpha1 *= 10.0; nits++; } return retCode; }
void GSRendererHW::RoundSpriteOffset() { //#define DEBUG_U //#define DEBUG_V #if defined(DEBUG_V) || defined(DEBUG_U) bool debug = linear; #endif size_t count = m_vertex.next; GSVertex* v = &m_vertex.buff[0]; for(size_t i = 0; i < count; i += 2) { // Performance note: if it had any impact on perf, someone would port it to SSE (AKA GSVector) // Compute the coordinate of first and last texels (in native with a linear filtering) int ox = m_context->XYOFFSET.OFX; int X0 = v[i].XYZ.X - ox; int X1 = v[i+1].XYZ.X - ox; int Lx = (v[i+1].XYZ.X - v[i].XYZ.X); float ax0 = alpha0(Lx, X0, X1); float ax1 = alpha1(Lx, X0, X1); int tx0 = Interpolate_UV(ax0, v[i].U, v[i+1].U); int tx1 = Interpolate_UV(ax1, v[i].U, v[i+1].U); #ifdef DEBUG_U if (debug) { fprintf(stderr, "u0:%d and u1:%d\n", v[i].U, v[i+1].U); fprintf(stderr, "a0:%f and a1:%f\n", ax0, ax1); fprintf(stderr, "t0:%d and t1:%d\n", tx0, tx1); } #endif int oy = m_context->XYOFFSET.OFY; int Y0 = v[i].XYZ.Y - oy; int Y1 = v[i+1].XYZ.Y - oy; int Ly = (v[i+1].XYZ.Y - v[i].XYZ.Y); float ay0 = alpha0(Ly, Y0, Y1); float ay1 = alpha1(Ly, Y0, Y1); int ty0 = Interpolate_UV(ay0, v[i].V, v[i+1].V); int ty1 = Interpolate_UV(ay1, v[i].V, v[i+1].V); #ifdef DEBUG_V if (debug) { fprintf(stderr, "v0:%d and v1:%d\n", v[i].V, v[i+1].V); fprintf(stderr, "a0:%f and a1:%f\n", ay0, ay1); fprintf(stderr, "t0:%d and t1:%d\n", ty0, ty1); } #endif #ifdef DEBUG_U if (debug) fprintf(stderr, "GREP_BEFORE %d => %d\n", v[i].U, v[i+1].U); #endif #ifdef DEBUG_V if (debug) fprintf(stderr, "GREP_BEFORE %d => %d\n", v[i].V, v[i+1].V); #endif #if 1 // Use rounded value of the newly computed texture coordinate. It ensures // that sampling will remains inside texture boundary // // Note for bilinear: by definition it will never work correctly! A sligh modification // of interpolation migth trigger a discard (with alpha testing) // Let's use something simple that correct really bad case (for a couple of 2D games). // I hope it won't create too much glitches. if (linear) { int Lu = v[i+1].U - v[i].U; // Note 32 is based on taisho-mononoke if ((Lu > 0) && (Lu <= (Lx+32))) { v[i+1].U -= 8; } } else { if (tx0 <= tx1) { v[i].U = tx0; v[i+1].U = tx1 + 16; } else { v[i].U = tx0 + 15; v[i+1].U = tx1; } } #endif #if 1 if (linear) { int Lv = v[i+1].V - v[i].V; if ((Lv > 0) && (Lv <= (Ly+32))) { v[i+1].V -= 8; } } else { if (ty0 <= ty1) { v[i].V = ty0; v[i+1].V = ty1 + 16; } else { v[i].V = ty0 + 15; v[i+1].V = ty1; } } #endif #ifdef DEBUG_U if (debug) fprintf(stderr, "GREP_AFTER %d => %d\n\n", v[i].U, v[i+1].U); #endif #ifdef DEBUG_V if (debug) fprintf(stderr, "GREP_AFTER %d => %d\n\n", v[i].V, v[i+1].V); #endif } }