// Reference: http://nehe.gamedev.net/data/lessons/lesson.asp?lesson=30 GLint testIntPlane(const Plane &pl, const Vec &pos, const Vec &dir, GLdouble &lamda, const GLdouble offset){ if ( (fabs(pl.position.getX() - pos.getX()) > offset) || (fabs(pl.position.getZ() - pos.getZ()) > offset)) { return 0; } /* Dot Product between the plane normal and ray direction */ GLdouble theDotProd = dotProd(dir, pl.normal); GLdouble lam; // Determine If Ray Parallel To Plane if ((theDotProd < ZERO) && (theDotProd > -ZERO)) return 0; /* Find the distance to the collision point */ lam = dotProd(pl.position-pos, pl.normal); lam /= theDotProd; if (lam < -ZERO) // Test If Collision Behind Start return 0; lamda=lam; return 1; }
double ModelMFWt::objective(const Data& data, std::unordered_set<int>& invalidUsers, std::unordered_set<int>& invalidItems) { int u, ii, item; float itemRat; double rmse = 0, uRegErr = 0, iRegErr = 0, obj = 0, diff = 0; gk_csr_t *trainMat = data.trainMat; std::unordered_set<int> headItems = getHeadItems(trainMat, 0.5); std::unordered_set<int> headUsers = getHeadUsers(trainMat, 0.5); double lambda0 = 0.8; double lambda1 = 1.0 - lambda0; for (u = 0; u < nUsers; u++) { //skip if invalid user auto search = invalidUsers.find(u); if (search != invalidUsers.end()) { //found and skip continue; } for (ii = trainMat->rowptr[u]; ii < trainMat->rowptr[u+1]; ii++) { item = trainMat->rowind[ii]; //skip if invalid item search = invalidItems.find(item); if (search != invalidItems.end()) { //found and skip continue; } itemRat = trainMat->rowval[ii]; diff = itemRat - estRating(u, item); if (headItems.find(item) != headItems.end() && headUsers.find(u) != headUsers.end()) { rmse += diff*diff*lambda0; } else { rmse += diff*diff*(lambda0+lambda1); } } uRegErr += dotProd(uFac[u], uFac[u], facDim); } uRegErr = uRegErr*uReg; for (item = 0; item < nItems; item++) { //skip if invalid item auto search = invalidItems.find(item); if (search != invalidItems.end()) { //found and skip continue; } iRegErr += dotProd(iFac[item], iFac[item], facDim); } iRegErr = iRegErr*iReg; obj = rmse + uRegErr + iRegErr; //std::cout <<"\nrmse: " << std::scientific << rmse << " uReg: " << uRegErr << " iReg: " << iRegErr ; return obj; }
/* returns the projection vector of u on v inside v same transformation is applied to v_id */ void proj(double *u, double *v, double *u_id, double *v_id, int dim) { double u_v = dotProd(u,v,dim); double u_u = dotProd(u,u,dim); int i; for(i = 0; i < dim; i++) { v[i] -= (u[i] * u_v) / u_u; v_id[i] -= (u_id[i] * u_v) / u_u; } }
void CSysSolve::modGramSchmidt(int i, vector<vector<double> > & Hsbg, vector<CSysVector> & w) { /*--- Parameter for reorthonormalization ---*/ static const double reorth = 0.98; /*--- get the norm of the vector being orthogonalized, and find the threshold for re-orthogonalization ---*/ double nrm = dotProd(w[i+1],w[i+1]); double thr = nrm*reorth; if (nrm <= 0.0) { /*--- The norm of w[i+1] < 0.0 ---*/ cerr << "CSysSolve::modGramSchmidt: dotProd(w[i+1],w[i+1]) < 0.0" << endl; throw(-1); } else if (nrm != nrm) { /*--- This is intended to catch if nrm = NaN, but some optimizations may mess it up (according to posts on stackoverflow.com) ---*/ cerr << "CSysSolve::modGramSchmidt: w[i+1] = NaN" << endl; throw(-1); } /*--- Begin main Gram-Schmidt loop ---*/ for (int k = 0; k < i+1; k++) { double prod = dotProd(w[i+1],w[k]); Hsbg[k][i] = prod; w[i+1].Plus_AX(-prod, w[k]); /*--- Check if reorthogonalization is necessary ---*/ if (prod*prod > thr) { prod = dotProd(w[i+1],w[k]); Hsbg[k][i] += prod; w[i+1].Plus_AX(-prod, w[k]); } /*--- Update the norm and check its size ---*/ nrm -= Hsbg[k][i]*Hsbg[k][i]; if (nrm < 0.0) nrm = 0.0; thr = nrm*reorth; } /*--- Test the resulting vector ---*/ nrm = w[i+1].norm(); Hsbg[i+1][i] = nrm; if (nrm <= 0.0) { /*--- w[i+1] is a linear combination of the w[0:i] ---*/ cerr << "CSysSolve::modGramSchmidt: w[i+1] linearly dependent on w[0:i]" << endl; throw(-1); } /*--- Scale the resulting vector ---*/ w[i+1] /= nrm; }
/** * @param x coordinates of point to be tested * @param t coordinates of apex point of cone * @param b coordinates of center of basement circle * @param aperture in radians Code copied from http://stackoverflow.com/questions/10768142/verify-if-point-is-inside-a-cone-in-3d-space credit to: furikuretsu altered to suit this purpose */ static bool isLyingInCone(float x[], float t[], float b[], float radius, float height) { float aperture = 2.f * atan(radius/height); // This is for our convenience float halfAperture = aperture/2.f; // Vector pointing to X point from apex float apexToXVect[] = {t[0]-x[0],t[1]-x[1],t[2]-x[2]}; // Vector pointing from apex to circle-center point. float axisVect[] = {t[0]-b[0],t[1]-b[1],t[2]-b[2]}; // X is lying in cone only if it's lying in // infinite version of its cone -- that is, // not limited by "round basement". // We'll use dotProd() to // determine angle between apexToXVect and axis. bool isInInfiniteCone = dotProd(apexToXVect,axisVect)/magn(apexToXVect)/magn(axisVect) > cos(halfAperture); // We can safely compare cos() of angles // between vectors instead of bare angles. return isInInfiniteCone; }
/* ------------------------------------- * Logistic Activation * ------------------------------------- * Theta = array of K x N. K = Númber of * classes. N = Dimension of each * observation. */ double logSumExp(double* theta, int i, int length){ if(!stocMode){ if(-log(1 + exp(pow(-1, logistic_labels[i])* dotProd(logistic_values[i], theta, length))) < -1e30){ return -1e10; // Numerical stability... } return -log(1 + exp(pow(-1, logistic_labels[i])* dotProd(logistic_values[i], theta, length))); } if(-log(1 + exp(pow(-1, sample_logistic_labels[i])* dotProd(sample_logistic_values[i], theta, length))) < -1e30){ return -1e10; // Numerical stability... } return -log(1 + exp(pow(-1, sample_logistic_labels[i])* dotProd(sample_logistic_values[i], theta, length))); }
int32_t dotProdVec(vector *a, vector *b){ int32_t *x = vectorToArray(a); int32_t *y = vectorToArray(b); int32_t result = dotProd(x, y, LENGTH); free(x); free(y); return result; }
double CSysVector::norm() const { /*--- just call dotProd on this*, then sqrt ---*/ double val = dotProd(*this,*this); if (val < 0.0) { cerr << "CSysVector::norm(): " << "inner product of CSysVector is negative"; throw(-1); } return sqrt(val); }
double GradientProjection::computeCost( valarray<double> const &b, valarray<double> const &x) const { // computes cost = 2 b x - x A x double cost = 2. * dotProd(b,x); valarray<double> Ax(x.size()); for (unsigned i=0; i<denseSize; i++) { Ax[i] = 0; for (unsigned j=0; j<denseSize; j++) { Ax[i] += (*denseQ)[i*denseSize+j]*x[j]; } } if(sparseQ) { valarray<double> r(x.size()); sparseQ->rightMultiply(x,r); Ax+=r; } return cost - dotProd(x,Ax); }
void SteerLib::GJK_EPA::getClosestEdge(std::vector<Util::Vector> simplex, float& distance, Util::Vector& normal, int& index) { for(int i = 0; i < simplex.size(); i++){ int j = (i + 1 == simplex.size()) ? 0 : i + 1; Util::Vector a = simplex[i]; Util::Vector b = simplex[j]; Util::Vector e = b - a; Util::Vector n = (a*dotProd(e, e) - e*dotProd(e,a)); n = Util::normalize(n); float d = dotProd(n, a); if(d < distance) { distance = d; normal = n; index = j; } } }
/* ------------------------------------- * Binary Logistic * ------------------------------------- * Theta = array of K x N. K = Númber of * classes. N = Dimension of each * observation. */ double logistic(double* theta, int length){ double loss = 0; int i; if(!stocMode){ SAMPLE = MAX_FILE_ROWS; for(i = 0; i < SAMPLE; i++){ // printf("logistic_label[%d] = %d | logSumExp[%d] = %lf\n", i, logistic_labels[i], i, logSumExp(theta, i, length)); loss = loss + logistic_labels[i]*logSumExp(theta, i, length) + (1 - logistic_labels[i])*logSumExp(theta, i, length) + regularization*dotProd(theta, theta, length); } }else{ for(i = 0; i < SAMPLE; i++){ loss = loss + sample_logistic_labels[i]*logSumExp(theta, i, length) + (1 - sample_logistic_labels[i])*logSumExp(theta, i, length) + regularization*dotProd(theta, theta, length); } } return -loss; }
bool RaySphere(Real3 p1,Real3 p2,Real3 sc, real r,real& u1,real& u2) { real a,b,c; real bb4ac; Real3 dp = p2-p1; Real3 l = p1-sc; a = dotProd(dp,dp); b = 2*dotProd(dp,l); c = dotProd(sc,sc)+dotProd(p1,p1)-2*dotProd(sc,p1)-r*r; bb4ac = b * b - 4 * a * c; if (fabs(a) < EPSILON6 || bb4ac < 0) { u1 = u2 = 0; return false; } real disc =sqrt(bb4ac); u1 = (-b + disc) / (2 * a); u2 = (-b - disc) / (2 * a); return true; }
LinRegResult linear_regression(DataSet theData) { LinRegResult result; int n = theData.n; // number of data points double sumx = DESCALE(sum(theData.x, n)); // sum of x double sumxx = DESCALE(dotProd(theData.x, theData.x, n)); // sum of each x squared double sumy = DESCALE(sum(theData.y, n)); // sum of y double sumyy = DESCALE(dotProd(theData.y, theData.y, n)); // sum of each y squared double sumxy = DESCALE(dotProd(theData.x, theData.y, n)); // sum of each x * y double m, b, r; // Compute least-squares best fit straight line m = (n * sumxy - sumx * sumy) / (n * sumxx - sqr(sumx)); // slope b = (sumy * sumxx - (sumx * sumxy)) / (n * sumxx - sqr(sumx)); // y-intercept r = (sumxy - sumx * sumy / n) / sqrt((sumxx - sqr(sumx) / n) * (sumyy - sqr(sumy)/ n)); // correlation result.m = m * SCALE; result.b = b * SCALE; result.r = r * SCALE; return result; }
std::pair<double, double> getMeanVar(std::vector<std::vector<double>> uFac, std::vector<std::vector<double>> iFac, int facDim, int nUsers, int nItems) { double mean = 0, var = 0, diff = 0; for (int u = 0; u < nUsers; u++) { for (int item = 0; item < nItems; item++) { mean += dotProd(uFac[u], iFac[item], facDim); } } mean = mean/(nItems*nUsers); for(int u = 0; u < nUsers; u++) { for (int item = 0; item < nItems; item++) { diff = dotProd(uFac[u], iFac[item], facDim) - mean; var += diff*diff; } } var = var/((nItems*nUsers) - 1); return std::make_pair(mean, var); }
int main(void){ /*Dot prod test*/ int32_t a[LENGTH] = {1, 3, -5}; int32_t b[LENGTH] = {4, -2, -1}; printf("%d\n", dotProd(a, b, LENGTH)); /*cross prod test*/ vector x = {2, 1, -1}; vector y = {-3, 4, 1}; vector c = crossProd(&x, &y); printVector(&c); }
/* Function: AreaOfTri * Description: Computes the area of a triangle * Input: inputModel - three vertices of the triangle * Output: Area */ double AreaOfTri(point A, point B, point C) { double mside1, mside2; point side1, side2; double dot = 0.0; pDIFFERENCE(A, B, side1); pDIFFERENCE(C, B, side2); mside1 = vecLeng(A, B); mside2 = vecLeng(C, B); dot = dotProd(side1, side2); if(mside1 > mside2) return (sqrt(mside2 * mside2 - dot * dot) * mside1) / 2.0; else return (sqrt(mside1 * mside1 - dot * dot) * mside2) / 2.0; }
/*populate the correlation matrix*/ matrix * populateCorrMatrix(matrix *V, matrix *W){ matrix * correlations = init_matrix(W->cols, V->cols); double a[V->rows]; double b[V->rows]; int x, y, i; //find correlations for each sample against each class for(x = 0; x < correlations->cols; x++){ //for each sample for(y = 0; y < correlations->rows; y++){ //for each class for(i = 0; i < W->rows; i++){ a[i] = W->graph[i][y]; //set a as the predictor column b[i] = V->graph[i][x]; //set b as the sample's column } correlations->graph[y][x] = dotProd(a,b,V->rows); } } return correlations; }
bool SteerLib::GJK_EPA::gjk(const std::vector<Util::Vector>& _shapeA, const std::vector<Util::Vector>& _shapeB, std::vector<Util::Vector>& simplex) { Util::Vector dir(1,0,0); simplex.push_back((getFarPoint(_shapeA, dir) - getFarPoint(_shapeB, -dir))); dir = -dir; while(true){ float dotProduct = 0; simplex.push_back((getFarPoint(_shapeA, dir) - getFarPoint(_shapeB, -dir))); if(dotProd(simplex.back(), dir) <= 0) return false; else if (checkOrigin(simplex, dir)){ simplex.push_back((getFarPoint(_shapeA, dir) - getFarPoint(_shapeB, -dir))); return true; } } }
Util::Vector SteerLib::GJK_EPA::getFarPoint(const std::vector<Util::Vector>& shape, const Util::Vector& dir){ Util::Vector farPoint(0,0,0); float farDistance = 0; float farIndex = 0; for(int i = 0; i < shape.size(); i++){ float checkFar = dotProd(shape[i], dir); if (checkFar > farDistance){ farDistance = checkFar; farIndex = i; } } farPoint[0] = shape[farIndex][0]; farPoint[1] = shape[farIndex][1]; farPoint[2] = shape[farIndex][2]; return farPoint; }
// puts result in 'ret' void Quaternion::slerp(Quaternion const &a, Quaternion b, float t, Quaternion &ret){ // reverse sign if dot prod < 0 if (dotProd(a, b) < 0) { b *= -1; } float angle = getAngle(a, b); float sc1, sc2; // like suggested in the book, for small angles we use the sin(a) = a appr. if (angle > 0.00001) { sc1 = sin( (1-t)*angle ) / sin( angle ); sc2 = sin( t*angle ) / sin( angle ); } else { sc1 = 1 - t; sc2 = t; } ret.w = sc1*a.w + sc2*b.w; ret.w = sc1*a.w + sc2*b.w; ret.x = sc1*a.x + sc2*b.x; ret.y = sc1*a.y + sc2*b.y; ret.z = sc1*a.z + sc2*b.z; }
// compute optimal step size along descent vector d relative to // a gradient related vector g // stepsize = ( g' d ) / ( d' A d ) double GradientProjection::computeStepSize( valarray<double> const & g, valarray<double> const & d) const { COLA_ASSERT(g.size()==d.size()); valarray<double> Ad; if(sparseQ) { Ad.resize(g.size()); sparseQ->rightMultiply(d,Ad); } double const numerator = dotProd(g, d); double denominator = 0; for (unsigned i=0; i<g.size(); i++) { double r = sparseQ ? Ad[i] : 0; if(i<denseSize) { for (unsigned j=0; j<denseSize; j++) { r += (*denseQ)[i*denseSize+j] * d[j]; } } denominator += r * d[i]; } if(denominator==0) { return 0; } return numerator/(2.*denominator); }
/* Function: penaltyForce * Description: Computes the penalty force between two points. * Input: p - Coordinates of first point * pV - Velocity of first point * I - Intersection point * V - Velocity of Intersection point * kH - K value for Hook's Law * kD - K value for damping force * Output: Penalty force vector */ point penaltyForce(point p, point pV, point I, point V, double kH, double kD) { double mag, length, dot; point dist, hForce, dForce, pVel, vDiff, pForce; // Initialize force computation variables pDIFFERENCE(p, I, dist); pDIFFERENCE(pV, V, vDiff); dot = dotProd(vDiff, dist); // Compute Hooks Force pNORMALIZE(dist); pMULTIPLY(dist, -(kH * length), hForce); // Compute Damping Forces mag = length; pNORMALIZE(pV); pMULTIPLY(pV, (kD * (dot/length)), dForce); // Compute Penalty Force pSUM(hForce, dForce, pForce); return pForce; } //end penaltyForce
void collideWithSphere(Particles& particles, real sphereRadius) { Particles::Positions& pos = particles.pos_; Particles::Velocities& dv = particles.dv_; Particles::Velocities& vel = particles.vel_; const unsigned size = pos.size(); // collision with glass for (unsigned i=0; i<size; ++i) { Real3 distance = pos[i]+dv[i]; if (distance.sqrnorm() > sphereRadius*sphereRadius) { Real3 d = normalize(distance); //pos[i]= sphereRadius*d; dv[i]=sphereRadius*d-pos[i]; // perfect bounce == 2 slip walls = 1 real k = 1.8; vel[i]+= -k*dotProd(vel[i],d)*d; // friction vel[i]*=0.95; } } }
/*! * @brief Classify objects according to their size and color. * * @param pImgRaw Pointer to the image captured from the camera. Used to measure the color of images. * @param pObj Pointer to the first object of the list of objects to be classified. * @param thresholdWeight Minimum weight of an object to be considered. * @param spotSize Size of the spot to debayer for measuring the color. */ void classifyObjects(uint8 const * const pImgRaw, struct object * const pObj, uint32 const thresholdWeight, t_index const spotSize) { inline int32 dotProd(uint8 const * const vec1, int32 const * const vec2) { return vec1[0] * vec2[0] + vec1[1] * vec2[1] + vec1[2] * vec2[2]; } struct object * obj; for (obj = pObj; obj != NULL; obj = obj->pNext) { obj->posWghtX /= obj->weight; obj->posWghtY /= obj->weight; if (obj->weight < thresholdWeight) { obj->classification = e_classification_tooSmall; } else { uint8 color[3]; int16 posX, posY; int32 planes[3][3] = { { -3288, 6429, -4160 }, /* Between green and yellow and orange and red. */ { -141, 7330, -7662 }, /* Between green and yellow. */ { -782105, 575153, -64151 } /* Between orange and red. */ }; posX = 2 * obj->posWghtX - spotSize / 2; posY = 2 * obj->posWghtY - spotSize / 2; /* Move the spot inside the picture. */ if (posX < 0) posX = 0; else if (posX + spotSize >= WIDTH_CAPTURE) posX = WIDTH_CAPTURE - spotSize; if (posY < 0) posY = 0; else if (posY + spotSize >= HEIGHT_CAPTURE) posY = HEIGHT_CAPTURE - spotSize; OscVisDebayerSpot(pImgRaw, WIDTH_CAPTURE, HEIGHT_CAPTURE, data.enBayerOrder, posX, posY, spotSize, color); obj->color.red = color[2]; obj->color.green = color[1]; obj->color.blue = color[0]; if (dotProd(color, planes[0]) > 255) if (dotProd(color, planes[1]) > 255) obj->classification = e_classification_sugusGreen; else obj->classification = e_classification_sugusYellow; else if (dotProd(color, planes[2]) > 255) obj->classification = e_classification_sugusOrange; else obj->classification = e_classification_sugusRed; // obj->classification = e_classification_unknown; } } }
void CSysSolve::ModGramSchmidt(int i, vector<vector<su2double> > & Hsbg, vector<CSysVector> & w) { bool Convergence = true; int rank = MASTER_NODE; #ifdef HAVE_MPI int size; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); #endif /*--- Parameter for reorthonormalization ---*/ static const su2double reorth = 0.98; /*--- Get the norm of the vector being orthogonalized, and find the threshold for re-orthogonalization ---*/ su2double nrm = dotProd(w[i+1], w[i+1]); su2double thr = nrm*reorth; /*--- The norm of w[i+1] < 0.0 or w[i+1] = NaN ---*/ if ((nrm <= 0.0) || (nrm != nrm)) Convergence = false; /*--- Synchronization point to check the convergence of the solver ---*/ #ifdef HAVE_MPI unsigned short *sbuf_conv = NULL, *rbuf_conv = NULL; sbuf_conv = new unsigned short[1]; sbuf_conv[0] = 0; rbuf_conv = new unsigned short[1]; rbuf_conv[0] = 0; /*--- Convergence criteria ---*/ sbuf_conv[0] = Convergence; SU2_MPI::Reduce(sbuf_conv, rbuf_conv, 1, MPI_UNSIGNED_SHORT, MPI_SUM, MASTER_NODE, MPI_COMM_WORLD); /*-- Compute global convergence criteria in the master node --*/ sbuf_conv[0] = 0; if (rank == MASTER_NODE) { if (rbuf_conv[0] == size) sbuf_conv[0] = 1; else sbuf_conv[0] = 0; } SU2_MPI::Bcast(sbuf_conv, 1, MPI_UNSIGNED_SHORT, MASTER_NODE, MPI_COMM_WORLD); if (sbuf_conv[0] == 1) Convergence = true; else Convergence = false; delete [] sbuf_conv; delete [] rbuf_conv; #endif if (!Convergence) { if (rank == MASTER_NODE) cout << "\n !!! Error: SU2 has diverged. Now exiting... !!! \n" << endl; #ifndef HAVE_MPI exit(EXIT_DIVERGENCE); #else MPI_Abort(MPI_COMM_WORLD,1); #endif } /*--- Begin main Gram-Schmidt loop ---*/ for (int k = 0; k < i+1; k++) { su2double prod = dotProd(w[i+1], w[k]); Hsbg[k][i] = prod; w[i+1].Plus_AX(-prod, w[k]); /*--- Check if reorthogonalization is necessary ---*/ if (prod*prod > thr) { prod = dotProd(w[i+1], w[k]); Hsbg[k][i] += prod; w[i+1].Plus_AX(-prod, w[k]); } /*--- Update the norm and check its size ---*/ nrm -= Hsbg[k][i]*Hsbg[k][i]; if (nrm < 0.0) nrm = 0.0; thr = nrm*reorth; } /*--- Test the resulting vector ---*/ nrm = w[i+1].norm(); Hsbg[i+1][i] = nrm; // if (nrm <= 0.0) { // // /*--- w[i+1] is a linear combination of the w[0:i] ---*/ // // cerr << "The FGMRES linear solver has diverged" << endl; //#ifndef HAVE_MPI // exit(EXIT_DIVERGENCE); //#else // MPI_Abort(MPI_COMM_WORLD,1); // MPI_Finalize(); //#endif // // } /*--- Scale the resulting vector ---*/ w[i+1] /= nrm; }
unsigned long CSysSolve::BCGSTAB_LinSolver(const CSysVector & b, CSysVector & x, CMatrixVectorProduct & mat_vec, CPreconditioner & precond, su2double tol, unsigned long m, su2double *residual, bool monitoring) { int rank = 0; #ifdef HAVE_MPI MPI_Comm_rank(MPI_COMM_WORLD, &rank); #endif /*--- Check the subspace size ---*/ if (m < 1) { if (rank == MASTER_NODE) cerr << "CSysSolve::BCGSTAB: illegal value for subspace size, m = " << m << endl; #ifndef HAVE_MPI exit(EXIT_FAILURE); #else MPI_Abort(MPI_COMM_WORLD,1); MPI_Finalize(); #endif } CSysVector r(b); CSysVector r_0(b); CSysVector p(b); CSysVector v(b); CSysVector s(b); CSysVector t(b); CSysVector phat(b); CSysVector shat(b); CSysVector A_x(b); /*--- Calculate the initial residual, compute norm, and check if system is already solved ---*/ mat_vec(x, A_x); r -= A_x; r_0 = r; // recall, r holds b initially su2double norm_r = r.norm(); su2double norm0 = b.norm(); if ( (norm_r < tol*norm0) || (norm_r < eps) ) { if (rank == MASTER_NODE) cout << "CSysSolve::BCGSTAB(): system solved by initial guess." << endl; return 0; } /*--- Initialization ---*/ su2double alpha = 1.0, beta = 1.0, omega = 1.0, rho = 1.0, rho_prime = 1.0; /*--- Set the norm to the initial initial residual value ---*/ norm0 = norm_r; /*--- Output header information including initial residual ---*/ int i = 0; if ((monitoring) && (rank == MASTER_NODE)) { WriteHeader("BCGSTAB", tol, norm_r); WriteHistory(i, norm_r, norm0); } /*--- Loop over all search directions ---*/ for (i = 0; i < (int)m; i++) { /*--- Compute rho_prime ---*/ rho_prime = rho; /*--- Compute rho_i ---*/ rho = dotProd(r, r_0); /*--- Compute beta ---*/ beta = (rho / rho_prime) * (alpha /omega); /*--- p_{i} = r_{i-1} + beta * p_{i-1} - beta * omega * v_{i-1} ---*/ su2double beta_omega = -beta*omega; p.Equals_AX_Plus_BY(beta, p, beta_omega, v); p.Plus_AX(1.0, r); /*--- Preconditioning step ---*/ precond(p, phat); mat_vec(phat, v); /*--- Calculate step-length alpha ---*/ su2double r_0_v = dotProd(r_0, v); alpha = rho / r_0_v; /*--- s_{i} = r_{i-1} - alpha * v_{i} ---*/ s.Equals_AX_Plus_BY(1.0, r, -alpha, v); /*--- Preconditioning step ---*/ precond(s, shat); mat_vec(shat, t); /*--- Calculate step-length omega ---*/ omega = dotProd(t, s) / dotProd(t, t); /*--- Update solution and residual: ---*/ x.Plus_AX(alpha, phat); x.Plus_AX(omega, shat); r.Equals_AX_Plus_BY(1.0, s, -omega, t); /*--- Check if solution has converged, else output the relative residual if necessary ---*/ norm_r = r.norm(); if (norm_r < tol*norm0) break; if (((monitoring) && (rank == MASTER_NODE)) && ((i+1) % 50 == 0) && (rank == MASTER_NODE)) WriteHistory(i+1, norm_r, norm0); } if ((monitoring) && (rank == MASTER_NODE)) { cout << "# BCGSTAB final (true) residual:" << endl; cout << "# Iteration = " << i << ": |res|/|res0| = " << norm_r/norm0 << ".\n" << endl; } // /*--- Recalculate final residual (this should be optional) ---*/ // mat_vec(x, A_x); // r = b; r -= A_x; // su2double true_res = r.norm(); // // if ((fabs(true_res - norm_r) > tol*10.0) && (rank == MASTER_NODE)) { // cout << "# WARNING in CSysSolve::BCGSTAB(): " << endl; // cout << "# true residual norm and calculated residual norm do not agree." << endl; // cout << "# true_res - calc_res = " << true_res <<" "<< norm_r << endl; // } (*residual) = norm_r; return i; }
unsigned long CSysSolve::CG_LinSolver(const CSysVector & b, CSysVector & x, CMatrixVectorProduct & mat_vec, CPreconditioner & precond, su2double tol, unsigned long m, bool monitoring) { int rank = 0; #ifdef HAVE_MPI MPI_Comm_rank(MPI_COMM_WORLD, &rank); #endif /*--- Check the subspace size ---*/ if (m < 1) { if (rank == MASTER_NODE) cerr << "CSysSolve::ConjugateGradient: illegal value for subspace size, m = " << m << endl; #ifndef HAVE_MPI exit(EXIT_FAILURE); #else MPI_Abort(MPI_COMM_WORLD,1); MPI_Finalize(); #endif } CSysVector r(b); CSysVector A_p(b); /*--- Calculate the initial residual, compute norm, and check if system is already solved ---*/ mat_vec(x, A_p); r -= A_p; // recall, r holds b initially su2double norm_r = r.norm(); su2double norm0 = b.norm(); if ( (norm_r < tol*norm0) || (norm_r < eps) ) { if (rank == MASTER_NODE) cout << "CSysSolve::ConjugateGradient(): system solved by initial guess." << endl; return 0; } su2double alpha, beta, r_dot_z; CSysVector z(r); precond(r, z); CSysVector p(z); /*--- Set the norm to the initial initial residual value ---*/ norm0 = norm_r; /*--- Output header information including initial residual ---*/ int i = 0; if ((monitoring) && (rank == MASTER_NODE)) { WriteHeader("CG", tol, norm_r); WriteHistory(i, norm_r, norm0); } /*--- Loop over all search directions ---*/ for (i = 0; i < (int)m; i++) { /*--- Apply matrix to p to build Krylov subspace ---*/ mat_vec(p, A_p); /*--- Calculate step-length alpha ---*/ r_dot_z = dotProd(r, z); alpha = dotProd(A_p, p); alpha = r_dot_z / alpha; /*--- Update solution and residual: ---*/ x.Plus_AX(alpha, p); r.Plus_AX(-alpha, A_p); /*--- Check if solution has converged, else output the relative residual if necessary ---*/ norm_r = r.norm(); if (norm_r < tol*norm0) break; if (((monitoring) && (rank == MASTER_NODE)) && ((i+1) % 5 == 0)) WriteHistory(i+1, norm_r, norm0); precond(r, z); /*--- Calculate Gram-Schmidt coefficient beta, beta = dotProd(r_{i+1}, z_{i+1}) / dotProd(r_{i}, z_{i}) ---*/ beta = 1.0 / r_dot_z; r_dot_z = dotProd(r, z); beta *= r_dot_z; /*--- Gram-Schmidt orthogonalization; p = beta *p + z ---*/ p.Equals_AX_Plus_BY(beta, p, 1.0, z); } if ((monitoring) && (rank == MASTER_NODE)) { cout << "# Conjugate Gradient final (true) residual:" << endl; cout << "# Iteration = " << i << ": |res|/|res0| = " << norm_r/norm0 << ".\n" << endl; } // /*--- Recalculate final residual (this should be optional) ---*/ // mat_vec(x, A_p); // r = b; // r -= A_p; // su2double true_res = r.norm(); // // if (fabs(true_res - norm_r) > tol*10.0) { // if (rank == MASTER_NODE) { // cout << "# WARNING in CSysSolve::ConjugateGradient(): " << endl; // cout << "# true residual norm and calculated residual norm do not agree." << endl; // cout << "# true_res - calc_res = " << true_res - norm_r << endl; // } // } return i; }
/* ------------------------------------- * FindH * IN * func: Function to be minimized. * nRow: Number of rows (length) of x. * N_max: Maximum number of CG iterates. * TOL: Minimum size for gradient of f. * OUT * x: Local minimum of func. * ------------------------------------- */ double * findHSLM(double (*func)(double*, int), double *x, double** s, double** y, int nRow, int m, int k, int N_max){ double *q, *r, *alpha, *rho, *Bd, *r_cg, *d, *z, *r_new; double beta, beta_cg, alpha_cg, epsilon; int i, state, j; // Calculate gradient. r = q = gradCentralDiff(func, x, nRow); // Initialize variables alpha = (double*) malloc(nRow * sizeof(double)); rho = (double*) malloc(nRow * sizeof(double)); state = min(k, m); // Fill in rho for(i = 0; i < state; i++){ rho[i] = 1 / (dotProd(y[i], s[i], nRow)); } // First Loop for(i = (state - 1); i > 0; i--){ alpha[i] = rho[i] * dotProd(s[i], q, nRow); q = vSum(q, vProd(y[i], -alpha[i], nRow), nRow); } /* * ----------------------------------- * ########### CG Iteration ########## * ----------------------------------- * Outputs: r */ // Initialize: epsilon, d, r_cg, z epsilon = min(.5, sqrt(norm(q, nRow))) * norm(q, nRow); d = vProd(q, 1, nRow); r_cg = vProd(q, 1, nRow); z = vProd(q, 0, nRow); for(j = 0; j < N_max; j++){ Bd = hessCentralDiff(func, x, d, nRow); // Check if d'Bd <= 0 i.e. d is a descent direction. if(dotProd(d, Bd, nRow) <= 0){ if(j == 0){ r = d; break; }else{ r = z; break; } } // alpha_j = rj'rj/d_j'Bd_j alpha_cg = dotProd(r_cg, r_cg, nRow) / dotProd(d, Bd, nRow); // z_{j+1} = z_j + alpha_j*d_j z = vSum(z, vProd(d, alpha_cg, nRow), nRow); // r_{j+1} = r_j + alpha_j*B_kd_j r_new = vSum(r_cg, vProd(Bd, alpha_cg, nRow), nRow); if(norm(r_new, nRow) < epsilon){ r = z; break; } // Update beta, d, r_cg. beta_cg = dotProd(r_new, r_new, nRow) / dotProd(r_cg, r_cg, nRow); d = vSum(vProd(r_new, -1, nRow), vProd(d, beta_cg, nRow), nRow); r_cg = r_new; } /* ----------------------------------- * ######### CG Iteration End ######## * ----------------------------------- */ // Second Loop for(i = 0; i < state; i ++){ beta = rho[i] * dotProd(y[i], r, nRow); r = vSum(r, vProd(s[i], (alpha[i] - beta), nRow), nRow); } // Memory release. free(alpha); free(rho); // Return result. return r; }
/* ------------------------------------- * LBFGS * IN * func: Function to be minimized. * nRow: Number of rows (length) of x. * N_max: Maximum number of CG iterates. * TOL: Minimum size for gradient of f. * OUT * x: Local minimum of func. * ------------------------------------- */ double * SLM_LBFGS(double (* func)(double*, int), int nRow, int m, double TOL, int N_max, int verbose){ // Variable declaration. double **s, **y; double *x, *grad, *p, *x_new, *grad_new; double alpha, norm_grad0; int i, k, MAX_ITER, exploredDataPoints; // Space allocation. x = (double *)malloc(nRow * sizeof(double)); s = (double **)malloc((nRow*m) * sizeof(double)); y = (double **)malloc((nRow*m) * sizeof(double)); // Initialize x. for(i = 0; i < nRow; i++){ x[i] = ((double) rand() / INT_MAX) ; } // Stochastic Mode if(stocMode){ exploredDataPoints = 0; //printf("\nRUNNING STOCASTIC MODE\n"); SAMPLE = rand() % (int)(MAX_FILE_ROWS * sampProp); create_sample(0); exploredDataPoints += SAMPLE; } // Until Convergence or MAX_ITER. MAX_ITER = 6e6; grad = gradCentralDiff(func, x, nRow); // Update s, y. k = 0; // Initial norm of gradient. norm_grad0 = norm(grad, nRow); while(norm(grad, nRow) > TOL*(1 + norm_grad0) && ((run_logistic*exploredDataPoints + ((1 - run_logistic)*k)) < MAX_ITER)){ if(stocMode){ printf("\nRUNNING STOCASTIC MODE\n"); SAMPLE = rand() % (int)(MAX_FILE_ROWS * sampProp); create_sample(k); exploredDataPoints += SAMPLE; } // p = -Hgrad(f) if(k > 0){ p = vProd(findHSLM(func, x, s, y, nRow, m, k, N_max), -1, nRow); }else{ p = vProd(grad, -1, nRow); } // Alpha that statifies Wolfe conditions. alpha = backTrack(func, x, p, nRow, verbose); x_new = vSum(x, vProd(p, alpha, nRow), nRow); //imprimeTit("X_NEW"); //imprimeMatriz(x_new, 1, nRow); grad_new = gradCentralDiff(func, x_new, nRow); //imprimeTit("GRAD_NEW"); //imprimeMatriz(grad_new, 1, nRow); // Update s, y. updateSY(s, y, vProd(p, alpha, nRow), vSum(grad_new, vProd(grad, -1, nRow), nRow), m, k); // ---------------- PRINT ------------------- // if(verbose){ if(stocMode){ printf("\n ITER = %d; f(x) = %.10e ; " "||x|| = %.10e ; ||grad|| = %.10e ; " "||p|| = %.10e ; sTy = %.10e ; " "alpha = %.10e; explored data points = %d; precision = %fl ", k, func(x, nRow), norm(x, nRow), norm(grad, nRow), norm(p, nRow), dotProd(s[(int)min(k , (m - 1))], y[(int)min(k , (m - 1))], nRow), alpha, exploredDataPoints, class_precision(x, nRow, 0)); }else{ printf("\n ITER = %d; f(x) = %.10e ; " "||x|| = %.10e ; ||grad|| = %.10e ; " "||p|| = %.10e ; sTy = %.10e ; " "alpha = %.10e", k, func(x, nRow), norm(x, nRow), norm(grad, nRow), norm(p, nRow), dotProd(s[(int)min(k , (m - 1))], y[(int)min(k , (m - 1))], nRow), alpha); } } // ---------------- PRINT ------------------- //y // Update k, x, grad. x = x_new; grad = grad_new; k = k + 1; } free(grad); free(s); free(y); return x; }
void ModelMFWt::hogTrain(const Data &data, Model &bestModel, std::unordered_set<int>& invalidUsers, std::unordered_set<int>& invalidItems) { //copy passed known factors //uFac = data.origUFac; //iFac = data.origIFac; std::cout << "\nModelMFWt::hogTrain trainSeed: " << trainSeed; int nnz = data.trainNNZ; std::cout << "\nObj b4 svd: " << objective(data, invalidUsers, invalidItems) << " Train RMSE: " << RMSE(data.trainMat) << " Train nnz: " << nnz << std::endl; std::chrono::time_point<std::chrono::system_clock> startSVD, endSVD; startSVD = std::chrono::system_clock::now(); //initialization with svd of the passed matrix //svdFrmSvdlibCSR(data.trainMat, facDim, uFac, iFac, false); endSVD = std::chrono::system_clock::now(); std::chrono::duration<double> durationSVD = (endSVD - startSVD) ; std::cout << "\nsvd duration: " << durationSVD.count(); int iter, bestIter = -1; double bestObj, prevObj; double bestValRMSE, prevValRMSE; gk_csr_t *trainMat = data.trainMat; //vector to hold user gradient accumulation std::vector<std::vector<double>> uGradsAcc (nUsers, std::vector<double>(facDim,0)); //vector to hold item gradient accumulation std::vector<std::vector<double>> iGradsAcc (nItems, std::vector<double>(facDim,0)); //std::cout << "\nNNZ = " << nnz; prevObj = objective(data, invalidUsers, invalidItems); bestObj = prevObj; std::cout << "\nObj aftr svd: " << prevObj << " Train RMSE: " << RMSE(data.trainMat); std::chrono::time_point<std::chrono::system_clock> start, end; std::chrono::duration<double> duration; std::vector<std::unordered_set<int>> uISet(nUsers); genStats(trainMat, uISet, std::to_string(trainSeed)); getInvalidUsersItems(trainMat, uISet, invalidUsers, invalidItems); std::unordered_set<int> headItems = getHeadItems(trainMat, 0.5); std::unordered_set<int> headUsers = getHeadItems(trainMat, 0.5); double lambda0 = 0.8; double lambda1 = 1.0 - lambda0; //random engine std::mt19937 mt(trainSeed); //get user-item ratings from training data auto uiRatings = getUIRatings(trainMat, invalidUsers, invalidItems); //index to above uiRatings pair std::vector<size_t> uiRatingInds(uiRatings.size()); std::iota(uiRatingInds.begin(), uiRatingInds.end(), 0); std::cout << "\nTrain NNZ after removing invalid users and items: " << uiRatings.size() << std::endl; double subIterDuration = 0; for (iter = 0; iter < maxIter; iter++) { //shuffle the user item rating indexes std::shuffle(uiRatingInds.begin(), uiRatingInds.end(), mt); start = std::chrono::system_clock::now(); const int indsSz = uiRatingInds.size(); #pragma omp parallel for for (int k = 0; k < indsSz; k++) { auto ind = uiRatingInds[k]; //get user, item and rating int u = std::get<0>(uiRatings[ind]); int item = std::get<1>(uiRatings[ind]); float itemRat = std::get<2>(uiRatings[ind]); double r_ui_est = dotProd(uFac[u], iFac[item], facDim); double diff = itemRat - r_ui_est; if (headItems.find(item) != headItems.end()) { diff = diff*lambda0; } else { diff = diff*(lambda0 + lambda1); } //update user for (int i = 0; i < facDim; i++) { uFac[u][i] -= learnRate*(-2.0*diff*iFac[item][i] + 2.0*uReg*uFac[u][i]); } r_ui_est = dotProd(uFac[u], iFac[item], facDim); diff = itemRat - r_ui_est; if (headItems.find(item) != headItems.end()) { diff = diff*lambda0; } else { diff = diff*(lambda0 + lambda1); } //update item for (int i = 0; i < facDim; i++) { iFac[item][i] -= learnRate*(-2.0*diff*uFac[u][i] + 2.0*iReg*iFac[item][i]); } } end = std::chrono::system_clock::now(); duration = end - start; subIterDuration = duration.count(); //check objective if (iter % OBJ_ITER == 0 || iter == maxIter-1) { if (isTerminateModel(bestModel, data, iter, bestIter, bestObj, prevObj, invalidUsers, invalidItems)) { break; } if (iter % 50 == 0) { std::cout << "ModelMFWt::train trainSeed: " << trainSeed << " Iter: " << iter << " Objective: " << std::scientific << prevObj << " Train RMSE: " << RMSE(data.trainMat, invalidUsers, invalidItems) << " Val RMSE: " << prevValRMSE << " sub duration: " << subIterDuration << std::endl; } if (iter % 500 == 0 || iter == maxIter - 1) { std::string modelFName = std::string(data.prefix); bestModel.saveFacs(modelFName); } } } //save best model found till now std::string modelFName = std::string(data.prefix); bestModel.saveFacs(modelFName); std::cout << "\nBest model validation RMSE: " << bestModel.RMSE(data.valMat, invalidUsers, invalidItems); }