vec diag(DistMat & mat){ assert(info.is_square()); vec ret = zeros(info.total()); for (int i=0; i< info.total(); i++){ ret[i] = latent_factors_inmem[i].A_ii; } return ret; }
void init_lanczos(bipartite_graph_descriptor & info){ data_size = nsv + nv+1 + max_iter; actual_vector_len = data_size; #pragma omp parallel for for (int i=0; i< info.total(); i++){ latent_factors_inmem[i].pvec = zeros(actual_vector_len); } logstream(LOG_INFO)<<"Allocated a total of: " << ((double)actual_vector_len * info.total() * sizeof(double)/ 1e6) << " MB for storing vectors." << std::endl; }
vec diag(DistMat & mat){ assert(info.is_square()); vec ret = zeros(info.total()); for (int i=0; i< info.total(); i++){ //TODO ret[i] = pgraph->vertex_data(i).A_ii; assert(false); } return ret; }
/** * Vertex update function. */ void update(graphchi_vertex<VertexDataType, EdgeDataType> &vertex, graphchi_context &gcontext) { if (vertex.id() < (uint)mi.start || vertex.id() >= (uint)mi.end) return; vertex_data& user = latent_factors_inmem[vertex.id()]; bool rows = vertex.id() < (uint)info.get_start_node(false); if (info.is_square()) rows = mi.A_transpose; (void) rows; // unused assert(mi.r_offset >=0); //store previous value for convergence detection if (mi.prev_offset >= 0) user.pvec[mi.prev_offset ] = user.pvec[mi.r_offset]; double val = 0; assert(mi.x_offset >=0 || mi.y_offset>=0); /*** COMPUTE r = c*A*x ********/ if (mi.A_offset && mi.x_offset >= 0){ for(int e=0; e < vertex.num_edges(); e++) { const edge_data & edge = vertex.edge(e)->get_data(); const vertex_data & movie = latent_factors_inmem[vertex.edge(e)->vertex_id()]; val += (edge.weight * movie.pvec[mi.x_offset]); } if (info.is_square() && mi.use_diag)// add the diagonal term val += (/*mi.c**/ (user.A_ii+ regularization) * user.pvec[mi.x_offset]); val *= mi.c; } /***** COMPUTE r = c*I*x *****/ else if (!mi.A_offset && mi.x_offset >= 0){ val = mi.c*user.pvec[mi.x_offset]; } /**** COMPUTE r+= d*y (optional) ***/ if (mi.y_offset>= 0){ val += mi.d*user.pvec[mi.y_offset]; } /***** compute r = (... ) / div */ if (mi.div_offset >= 0){ val /= user.pvec[mi.div_offset]; } assert(mi.r_offset>=0 && mi.r_offset < user.pvec.size()); user.pvec[mi.r_offset] = val; } //end update
DistVec& DistVec::operator=(DistMat &mat){ mi.r_offset = offset; assert(prev_offset < data_size); mi.prev_offset = prev_offset; transpose = mat.transpose; mi.start = info.get_start_node(!transpose); mi.end = info.get_end_node(!transpose); //graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); //set_engine_flags(engine); //Axb program; pengine->run(program, 1); debug_print(name); mi.reset_offsets(); mat.transpose = false; return *this; }
DistVec& operator=(const DistVec & vec){ assert(offset < (info.is_square() ? 2*data_size: data_size)); if (mi.x_offset == -1 && mi.y_offset == -1){ mi.y_offset = vec.offset; } mi.r_offset = offset; assert(prev_offset < data_size); mi.prev_offset = prev_offset; if (mi.d == 0.0) mi.d=1.0; transpose = vec.transpose; end = vec.end; start = vec.start; mi.start = start; mi.end = end; graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); engine.set_disable_vertexdata_storage(); engine.set_modifies_inedges(false); engine.set_modifies_outedges(false); Axb program; engine.run(program, 1); debug_print(name); mi.reset_offsets(); return *this; }
DistVec& operator=(const vec & pvec){ assert(offset >= 0); assert(pvec.size() == info.num_nodes(true) || pvec.size() == info.num_nodes(false)); assert(start < end); if (!info.is_square() && pvec.size() == info.num_nodes(false)){ transpose = true; } else { transpose = false; } for (int i=start; i< end; i++){ latent_factors_inmem[i].pvec[offset] = pvec[i-start]; } debug_print(name); return *this; }
DistVec& DistVec::operator=(DistMat &mat){ mi.r_offset = offset; assert(prev_offset < data_size); mi.prev_offset = prev_offset; transpose = mat.transpose; mi.start = info.get_start_node(!transpose); mi.end = info.get_end_node(!transpose); graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); engine.set_disable_vertexdata_storage(); Axb program; engine.set_modifies_inedges(false); engine.set_modifies_outedges(false); engine.run(program, 1); debug_print(name); mi.reset_offsets(); mat.transpose = false; return *this; }
/* Gather the weighted rank of the adjacent page */ double gather(icontext_type& context, const vertex_type& vertex, edge_type& edge) const { if (edge.data().role == edge_data::PREDICT) return 0; bool brows = vertex.id() < (uint)info.get_start_node(false); if (info.is_square()) brows = !mi.A_transpose; if (mi.A_offset && mi.x_offset >= 0){ double val = edge.data().obs * (brows ? edge.target().data().pvec[mi.x_offset] : edge.source().data().pvec[mi.x_offset]); //printf("gather edge on vertex %d val %lg obs %lg\n", vertex.id(), val, edge.data().obs); return val; } //printf("edge on vertex %d val %lg\n", vertex.id(), 0.0); return 0; }
DistVec& DistVec::operator=(DistMat &mat){ mi.r_offset = offset; assert(prev_offset < data_size); mi.prev_offset = prev_offset; transpose = mat.transpose; mi.start = info.get_start_node(!transpose); mi.end = info.get_end_node(!transpose); INITIALIZE_TRACER(Axbtrace, "Axb update function"); BEGIN_TRACEPOINT(Axbtrace); pcurrent = this; int old_start = start; int old_end = end; start = mi.start; end = mi.end; start_engine(); start = old_start; end = old_end; END_TRACEPOINT(Axbtrace); debug_print(name); mi.reset_offsets(); mat.transpose = false; return *this; }
DistSlicedMat(int _start_offset, int _end_offset, bool _transpose, const bipartite_graph_descriptor &_info, std::string _name){ //assert(_start_offset < _end_offset); assert(_start_offset >= 0); assert(_info.total() > 0); transpose = _transpose; info = _info; init(); start_offset = _start_offset; end_offset = _end_offset; name = _name; }
void multiply(DistSlicedMat & mat, int curoffset, double a){ assert(a>0); DistVec current = mat[curoffset]; assert(mat.start_offset <= current.offset); vec result = zeros(curoffset); if (curoffset > 0){ #pragma omp parallel for for (int i=mat.start_offset; i< current.offset; i++){ for (int k=info.get_start_node(!current.transpose); k< info.get_end_node(!current.transpose); k++){ result[i-mat.start_offset] += latent_factors_inmem[k].pvec[i] * latent_factors_inmem[k].pvec[current.offset]; } } #pragma omp parallel for for (int k=info.get_start_node(!current.transpose); k< info.get_end_node(!current.transpose); k++){ latent_factors_inmem[k].pvec[curoffset] /= a; } for (int i=mat.start_offset; i< current.offset; i++){ #pragma omp parallel for for (int k=info.get_start_node(!current.transpose); k< info.get_end_node(!current.transpose); k++){ latent_factors_inmem[k].pvec[current.offset] -= result[i-mat.start_offset]/a * latent_factors_inmem[k].pvec[i]; } } } current.debug_print(current.name); }
void orthogonalize_vs_all(DistSlicedMat & mat, int curoffset, double &alpha){ assert(mi.ortho_repeats >=1 && mi.ortho_repeats <= 3); bool old_debug = debug; debug = false; DistVec current = mat[curoffset]; assert(mat.start_offset <= current.offset); double * alphas = new double[curoffset]; //DistDouble * alphas = new DistDouble[curoffset]; //cout<<current.to_vec().transpose() << endl; if (curoffset > 0){ for (int j=0; j < mi.ortho_repeats; j++){ memset(alphas, 0, sizeof(double)*curoffset); #pragma omp parallel for for (int i=mat.start_offset; i< current.offset; i++){ for (int k=info.get_start_node(!current.transpose); k< info.get_end_node(!current.transpose); k++){ assert(i-mat.start_offset>=0 && i-mat.start_offset < curoffset); assert(i < latent_factors_inmem[k].pvec.size()); assert(k < (int)latent_factors_inmem.size()); assert(current.offset < latent_factors_inmem[k].pvec.size()); alphas[i-mat.start_offset] += latent_factors_inmem[k].pvec[i] * latent_factors_inmem[k].pvec[current.offset]; } } for (int i=mat.start_offset; i< current.offset; i++){ #pragma omp parallel for for (int k=info.get_start_node(!current.transpose); k< info.get_end_node(!current.transpose); k++){ latent_factors_inmem[k].pvec[current.offset] -= alphas[i-mat.start_offset] * latent_factors_inmem[k].pvec[i]; } } } //for ortho_repeast } delete [] alphas; debug = old_debug; current.debug_print(current.name); // alpha = 0; double sum = 0; int k; //#pragma omp parallel for private(k) reduction(+: sum) for (k=info.get_start_node(!current.transpose); k< info.get_end_node(!current.transpose); k++){ sum = sum + pow(latent_factors_inmem[k].pvec[current.offset],2); } alpha = sqrt(sum); if (alpha >= 1e-10 ){ #pragma omp parallel for for (int k=info.get_start_node(!current.transpose); k< info.get_end_node(!current.transpose); k++){ latent_factors_inmem[k].pvec[current.offset]/=alpha; } } }
DistVec& DistVec::operator=(const vec & pvec){ assert(offset >= 0); assert(pvec.size() == info.num_nodes(true) || pvec.size() == info.num_nodes(false)); assert(start < end); if (!info.is_square() && pvec.size() == info.num_nodes(false)){ transpose = true; } else { transpose = false; } //#pragma omp parallel for INITIALIZE_TRACER(vecequals, "vector assignment"); BEGIN_TRACEPOINT(vecequals); //for (int i=start; i< end; i++){ // pgraph->vertex_data(i).pvec[offset] = pvec[i-start]; //} pcurrent = this; curvec = pvec; graphlab::vertex_set nodes = pgraph->select(select_in_range); pgraph->transform_vertices(assign_vec, nodes); END_TRACEPOINT(vecequals); debug_print(name); return *this; }
/* Use the total rank of adjacent pages to update this page */ void apply(icontext_type& context, vertex_type& vertex, const double& total) { //printf("Entered apply on node %d value %lg\n", vertex.id(), total); vertex_data & user = vertex.data(); assert(mi.x_offset >=0 || mi.y_offset >= 0); assert(mi.r_offset >=0); /* perform orthogonalization of current vector */ if (mi.orthogonalization){ for (int i=mi.mat_offset; i< mi.vec_offset; i++){ vertex.data().pvec[mi.vec_offset] -= alphas.pvec[i-mi.mat_offset] * vertex.data().pvec[i]; } return; } double val = total; //assert(total != 0 || mi.y_offset >= 0); //store previous value for convergence detection if (mi.prev_offset >= 0) user.pvec[mi.prev_offset ] = user.pvec[mi.r_offset]; assert(mi.x_offset >=0 || mi.y_offset>=0); if (mi.A_offset && mi.x_offset >= 0){ if (info.is_square() && mi.use_diag)// add the diagonal term val += (/*mi.c**/ (user.A_ii+ regularization) * user.pvec[mi.x_offset]); //printf("node %d added diag term: %lg\n", vertex.id(), user.A_ii); val *= mi.c; } /***** COMPUTE r = c*I*x *****/ else if (!mi.A_offset && mi.x_offset >= 0){ val = mi.c*user.pvec[mi.x_offset]; } /**** COMPUTE r+= d*y (optional) ***/ if (mi.y_offset>= 0){ val += mi.d*user.pvec[mi.y_offset]; } /***** compute r = (... ) / div */ if (mi.div_offset >= 0){ val /= user.pvec[mi.div_offset]; } user.pvec[mi.r_offset] = val; //printf("Exit apply on node %d value %lg\n", vertex.id(), val); }
DistVec& DistVec::operator=(const DistVec & vec){ assert(offset < (info.is_square() ? 2*data_size: data_size)); if (mi.x_offset == -1 && mi.y_offset == -1){ mi.y_offset = vec.offset; } mi.r_offset = offset; assert(prev_offset < data_size); mi.prev_offset = prev_offset; if (mi.d == 0.0) mi.d=1.0; transpose = vec.transpose; end = vec.end; start = vec.start; mi.start = start; mi.end = end; INITIALIZE_TRACER(Axbtrace2, "Update function Axb"); BEGIN_TRACEPOINT(Axbtrace2); pcurrent = (DistVec*)&vec; start_engine(); debug_print(name); mi.reset_offsets(); return *this; }
DistVec& operator=(const DistVec & vec){ assert(offset < (info.is_square() ? 2*data_size: data_size)); if (mi.x_offset == -1 && mi.y_offset == -1){ mi.y_offset = vec.offset; } mi.r_offset = offset; assert(prev_offset < data_size); mi.prev_offset = prev_offset; if (mi.d == 0.0) mi.d=1.0; transpose = vec.transpose; end = vec.end; start = vec.start; mi.start = start; mi.end = end; //graphchi_engine<VertexDataType, EdgeDataType> engine(training, nshards, false, m); //set_engine_flags(engine); //Axb program; pengine->run(program, 1); debug_print(name); mi.reset_offsets(); return *this; }
void assign_vec(graph_type::vertex_type & vertex){ if (!info.is_square()) assert(vertex.id() - pcurrent->start >= 0 && vertex.id() - pcurrent->start < curvec.size()); vertex.data().pvec[pcurrent->offset] = curvec[vertex.id() - pcurrent->start]; }
void init(){ start = info.get_start_node(!transpose); end = info.get_end_node(!transpose); assert(start < end && start >= 0 && end >= 1); //debug_print(name); };
bool selected_node(const graph_type::vertex_type& vertex){ if (info.is_square()) return true; else return ((vertex.id() >= (uint)info.get_start_node(!pcurrent->transpose)) && (vertex.id() < (uint)info.get_end_node(!pcurrent->transpose))); }
vec lanczos( bipartite_graph_descriptor & info, timer & mytimer, vec & errest, const std::string & vecfile){ int nconv = 0; int its = 1; DistMat A(info); DistSlicedMat U(info.is_square() ? data_size : 0, info.is_square() ? 2*data_size : data_size, true, info, "U"); DistSlicedMat V(0, data_size, false, info, "V"); vec alpha, beta, b; vec sigma = zeros(data_size); errest = zeros(nv); DistVec v_0(info, 0, false, "v_0"); if (vecfile.size() == 0) v_0 = randu(size(A,2)); PRINT_VEC2("svd->V", v_0); DistDouble vnorm = norm(v_0); v_0=v_0/vnorm; PRINT_INT(nv); while(nconv < nsv && its < max_iter){ std::cout<<"Starting iteration: " << its << " at time: " << mytimer.current_time() << std::endl; int k = nconv; int n = nv; PRINT_INT(k); PRINT_INT(n); alpha = zeros(n); beta = zeros(n); U[k] = V[k]*A._transpose(); orthogonalize_vs_all(U, k, alpha(0)); //alpha(0)=norm(U[k]).toDouble(); PRINT_VEC3("alpha", alpha, 0); //U[k] = U[k]/alpha(0); for (int i=k+1; i<n; i++){ std::cout <<"Starting step: " << i << " at time: " << mytimer.current_time() << std::endl; PRINT_INT(i); V[i]=U[i-1]*A; orthogonalize_vs_all(V, i, beta(i-k-1)); //beta(i-k-1)=norm(V[i]).toDouble(); //V[i] = V[i]/beta(i-k-1); PRINT_VEC3("beta", beta, i-k-1); U[i] = V[i]*A._transpose(); orthogonalize_vs_all(U, i, alpha(i-k)); //alpha(i-k)=norm(U[i]).toDouble(); //U[i] = U[i]/alpha(i-k); PRINT_VEC3("alpha", alpha, i-k); } V[n]= U[n-1]*A; orthogonalize_vs_all(V, n, beta(n-k-1)); //beta(n-k-1)=norm(V[n]).toDouble(); PRINT_VEC3("beta", beta, n-k-1); //compute svd of bidiagonal matrix PRINT_INT(nv); PRINT_NAMED_INT("svd->nconv", nconv); n = nv - nconv; PRINT_INT(n); alpha.conservativeResize(n); beta.conservativeResize(n); PRINT_MAT2("Q",eye(n)); PRINT_MAT2("PT",eye(n)); PRINT_VEC2("alpha",alpha); PRINT_VEC2("beta",beta); mat T=diag(alpha); for (int i=0; i<n-1; i++) set_val(T, i, i+1, beta(i)); PRINT_MAT2("T", T); mat a,PT; svd(T, a, PT, b); PRINT_MAT2("Q", a); alpha=b.transpose(); PRINT_MAT2("alpha", alpha); for (int t=0; t< n-1; t++) beta(t) = 0; PRINT_VEC2("beta",beta); PRINT_MAT2("PT", PT.transpose()); //estiamte the error int kk = 0; for (int i=nconv; i < nv; i++){ int j = i-nconv; PRINT_INT(j); sigma(i) = alpha(j); PRINT_NAMED_DBL("svd->sigma[i]", sigma(i)); PRINT_NAMED_DBL("Q[j*n+n-1]",a(n-1,j)); PRINT_NAMED_DBL("beta[n-1]",beta(n-1)); errest(i) = abs(a(n-1,j)*beta(n-1)); PRINT_NAMED_DBL("svd->errest[i]", errest(i)); if (alpha(j) > tol){ errest(i) = errest(i) / alpha(j); PRINT_NAMED_DBL("svd->errest[i]", errest(i)); } if (errest(i) < tol){ kk = kk+1; PRINT_NAMED_INT("k",kk); } if (nconv +kk >= nsv){ printf("set status to tol\n"); finished = true; } }//end for PRINT_NAMED_INT("k",kk); vec v; if (!finished){ vec swork=get_col(PT,kk); PRINT_MAT2("swork", swork); v = zeros(size(A,1)); for (int ttt=nconv; ttt < nconv+n; ttt++){ v = v+swork(ttt-nconv)*(V[ttt].to_vec()); } PRINT_VEC2("svd->V",V[nconv]); PRINT_VEC2("v[0]",v); } //compute the ritz eigenvectors of the converged singular triplets if (kk > 0){ PRINT_VEC2("svd->V", V[nconv]); mat tmp= V.get_cols(nconv,nconv+n)*PT; V.set_cols(nconv, nconv+kk, get_cols(tmp, 0, kk)); PRINT_VEC2("svd->V", V[nconv]); PRINT_VEC2("svd->U", U[nconv]); tmp= U.get_cols(nconv, nconv+n)*a; U.set_cols(nconv, nconv+kk,get_cols(tmp,0,kk)); PRINT_VEC2("svd->U", U[nconv]); } nconv=nconv+kk; if (finished) break; V[nconv]=v; PRINT_VEC2("svd->V", V[nconv]); PRINT_NAMED_INT("svd->nconv", nconv); its++; PRINT_NAMED_INT("svd->its", its); PRINT_NAMED_INT("svd->nconv", nconv); //nv = min(nconv+mpd, N); //if (nsv < 10) // nv = 10; PRINT_NAMED_INT("nv",nv); } // end(while) printf(" Number of computed signular values %d",nconv); printf("\n"); DistVec normret(info, nconv, false, "normret"); DistVec normret_tranpose(info, nconv, true, "normret_tranpose"); for (int i=0; i < nconv; i++){ normret = V[i]*A._transpose() -U[i]*sigma(i); double n1 = norm(normret).toDouble(); PRINT_DBL(n1); normret_tranpose = U[i]*A -V[i]*sigma(i); double n2 = norm(normret_tranpose).toDouble(); PRINT_DBL(n2); double err=sqrt(n1*n1+n2*n2); PRINT_DBL(err); PRINT_DBL(tol); if (sigma(i)>tol){ err = err/sigma(i); } PRINT_DBL(err); PRINT_DBL(sigma(i)); printf("Singular value %d \t%13.6g\tError estimate: %13.6g\n", i, sigma(i),err); } if (save_vectors){ std::cout<<"Going to save output vectors U and V" << std::endl; if (nconv == 0) logstream(LOG_FATAL)<<"No converged vectors. Aborting the save operation" << std::endl; char output_filename[256]; for (int i=0; i< nconv; i++){ sprintf(output_filename, "%s.U.%d", training.c_str(), i); write_output_vector(output_filename, U[i].to_vec(), false, "GraphLab v2 SVD output. This file contains eigenvector number i of the matrix U"); sprintf(output_filename, "%s.V.%d", training.c_str(), i); write_output_vector(output_filename, V[i].to_vec(), false, "GraphLab v2 SVD output. This file contains eigenvector number i of the matrix V'"); } } return sigma; }