//处理批量添加边的函数,边都属于一个图 //加边的同时,如果成功,要添加边的属性索引,边的属性索引是针对整个图的,而不是子图 void handler_add_edges(Replier &rep){ string graph_name=rep.get_graph_name(); list<Edge_u> &edges=rep.get_edges(); list<Edge_u>::iterator it=edges.begin(); Subgraph *sub; uint32_t num=0; while(it!=edges.end()){ sub=graph_set->get_subgraph(graph_name,(*it).s_id);//得到该图该顶点所在的子图,子图不存在,则会创建一个 Edge e(*it); int res=sub->add_edge((*it).s_id,e); if(res==0||res==1||res==-1) { if(res==0){ //添加边属性的索引 Graph *graph=graph_set->get_graph(graph_name); Key k((*it).blog_id); Value v((*it).s_id,(*it).d_id); //graph->add_edge_index(k,v); graph_set->get_graph(graph_name)->edge_num_increment(); num++;//添加成功,记录一笔 } edges.erase(it); it=edges.begin(); } else{ it++; if(it==edges.end()){ it=edges.begin(); } } } ostringstream stream_num; stream_num<<num; string string_num=stream_num.str(); rep.ans(STATUS_OK,string_num.c_str(),string_num.size()+1); }
//处理批量添加边的函数,边都属于一个图 void handler_add_vertexes(Replier &rep){ string graph_name=rep.get_graph_name(); list<Vertex_u> &vertexes=rep.get_vertexes(); list<Vertex_u>::iterator it=vertexes.begin(); Subgraph *sub; uint32_t num=0; while(it!=vertexes.end()){ sub=graph_set->get_subgraph(graph_name,(*it).id);//得到该图该顶点所在的子图,子图不存在,则会创建一个 Vertex v(*it); int res=sub->add_vertex(v); if(res==0||res==1){ if(res==0){ graph_set->get_graph(graph_name)->vertex_num_increment(); num++;//添加成功,记录一笔 } vertexes.erase(it); it=vertexes.begin(); } else{ it++; if(it==vertexes.end()){ it=vertexes.begin(); } } } ostringstream stream_num; stream_num<<num; string string_num=stream_num.str(); rep.ans(STATUS_OK,string_num.c_str(),string_num.size()+1); }
//处理批量read边的函数,边都属于一个图 void handler_read_two_edges(Replier &rep){ string graph_name=rep.get_graph_name(); list<Two_vertex> &vertexes=rep.get_two_vertexes(); list<Two_vertex>::iterator it=vertexes.begin(); Subgraph *sub; uint32_t num=0; list<Edge_u> edges; while(it!=vertexes.end()){ int res; sub=graph_set->get_subgraph(graph_name,(*it).s_id);//得到该图该顶点所在的子图,子图不存在,则会创建一个 //sub->read_edges((*it).s_id,(*it).d_id,edges); res=sub->read_all_edges((*it).s_id,edges); if(res==1||res==0){ vertexes.erase(it); it=vertexes.begin(); } if(res==2){ it++; if(it==vertexes.end()){ it=vertexes.begin(); } } } rep.ans(STATUS_OK,edges); }
void EnableSearchOnMatchNeighborhood(const MatchSet* m, const Subgraph& g, int sj_tree_node) { for (Subgraph::VertexIterator v_it = g.BeginVertexSet(); v_it != g.EndVertexSet(); v_it++) { uint64_t query_node = *v_it; uint64_t data_graph_node = m->GetMatch(query_node); EnableSearchOnNode(data_graph_node, sj_tree_node); } return; }
ComposedRule::ComposedRule(const ComposedRule &other, const Subgraph &rule, int depth) : m_baseRule(other.m_baseRule) , m_attachedRules(other.m_attachedRules) , m_openAttachmentPoints(other.m_openAttachmentPoints) , m_depth(depth) , m_size(other.m_size+rule.GetSize()) , m_nodeCount(other.m_nodeCount+rule.GetNodeCount()-1) { m_attachedRules.push_back(&rule); m_openAttachmentPoints.pop(); }
double Fw(vector<double> &drv) { double norm = dot_product(wvec, wvec); for (unsigned w = 0; w < drv.size(); w++) drv[w] += 2*wvec[w]; double sum = 0.0; for (unsigned s = 0; s < subgraphs.size(); s++) { Subgraph *sg = subgraphs[s]; Params *params = parameters[s]; PowerMethod *power = powers[s]; params->set_wvec(wvec); sg->recompute_scores(*params); params->recalculate_derivs(); power->pers_pagerank(); power->derivatives(); double sum_p = 0.0; vector<double> sum_d(wvec.size(), 0.0); candidate_sums(s, sum_p, sum_d); double loss = 0.0; for (unsigned d = 0; d < sg->positive.size(); d++) { unsigned pos = sg->positive[d]; double p_d = power->get_pagerank(pos); for (unsigned l = 0; l < sg->negative.size(); l++) { unsigned neg = sg->negative[l]; double p_l = power->get_pagerank(neg); double diff = p_l/sum_p - p_d/sum_p; loss += hloss(diff); for (unsigned w = 0; w < wvec.size(); w++) drv[w] += dhloss(diff) * ( (power->get_derivative(neg, w)*sum_p - p_l*sum_d[w])/(sum_p*sum_p) - (power->get_derivative(pos, w)*sum_p - p_d*sum_d[w])/(sum_p*sum_p) ); } } sum += loss; } return norm + sum; }
ComposedRule::ComposedRule(const Subgraph &baseRule) : m_baseRule(baseRule) , m_depth(baseRule.GetDepth()) , m_size(baseRule.GetSize()) , m_nodeCount(baseRule.GetNodeCount()) { const std::set<const Node *> &leaves = baseRule.GetLeaves(); for (std::set<const Node *>::const_iterator p = leaves.begin(); p != leaves.end(); ++p) { if ((*p)->GetType() == TREE) { m_openAttachmentPoints.push(*p); } } }
TEST (GraphCommon, CreateSubgraphsFromConnectedComponentsInNonRootSubgraph) { Subgraph graph (5); boost::add_edge (1, 2, graph); boost::add_edge (3, 4, graph); Subgraph s1 = graph.create_subgraph (); boost::add_vertex (1, s1); boost::add_vertex (2, s1); boost::add_vertex (3, s1); boost::add_vertex (4, s1); std::vector<SubgraphRef> subgraphs; pcl::graph::createSubgraphsFromConnectedComponents (s1, subgraphs); ASSERT_EQ (2, subgraphs.size ()); EXPECT_EQ (1, subgraphs[0].get ().local_to_global (0)); EXPECT_EQ (3, subgraphs[1].get ().local_to_global (0)); }
void ScfgRuleWriter::Write(const ScfgRule &rule, const Subgraph &g) { Write(rule,false); m_fwd << " Tree "; g.PrintTree(m_fwd); m_fwd << std::endl; m_inv << std::endl; }
void derivatives() { Graph *graph = sub->subgraph; for (unsigned k = 0; k < pnum; k++) { unsigned iter = 0; bool stop = false; // We are done when maxiteration is reached // or the error is small enough. while (iter < maxiter && !stop) { iter++; // copy last iteration #pragma omp parallel for for (unsigned i = 0; i < nvert; i++) { dlast[i][k] = deriv[i][k]; deriv[i][k] = 0.0; } #pragma omp parallel for for (unsigned id = 0; id < nvert; id++) { for (Graph::iterator e = graph->iterate_outgoing_edges(id); !e.end(); e++) { user_id fr = (*e).v2; double calc = sub->score(id, fr) * dlast[id][k] + pagerank[id] * (1.0-alpha) * params->qderiv(id, fr, k); #pragma omp atomic deriv[fr][k] += calc; } } stop = true; for (unsigned d = 0; d < sub->positive.size(); d++) { unsigned pos = sub->positive[d]; if (fabs(deriv[pos][k] - dlast[pos][k]) > tolerance) stop = false; } for (unsigned l = 0; l < sub->negative.size(); l++) { unsigned neg = sub->negative[l]; if (fabs(deriv[neg][k] - dlast[neg][k]) > tolerance) stop = false; } } cout << "Derivative iterations: " << iter << endl; } }
void ScfgRuleWriter::Write(const ScfgRule &rule, const Subgraph &g, size_t lineNum, bool printEndl) { Write(rule,lineNum,false); m_fwd << " {{Tree "; g.PrintTree(m_fwd); m_fwd << "}}"; if (printEndl) { m_fwd << std::endl; m_inv << std::endl; } }
void AlignmentGraph::ExtractMinimalRules(const Options &options) { // Determine which nodes are frontier nodes. std::set<Node *> frontierSet; ComputeFrontierSet(m_root, options, frontierSet); // Form the minimal frontier graph fragment rooted at each frontier node. std::vector<Subgraph> fragments; fragments.reserve(frontierSet.size()); for (std::set<Node *>::iterator p(frontierSet.begin()); p != frontierSet.end(); ++p) { Node *root = *p; Subgraph fragment = ComputeMinimalFrontierGraphFragment(root, frontierSet); assert(!fragment.IsTrivial()); // Can it form an SCFG rule? // FIXME Does this exclude non-lexical unary rules? if (root->GetType() == TREE && !root->GetSpan().empty()) { root->AddRule(new Subgraph(fragment)); } } }
void run(Graph *graph, Users_data *profile, vector<user_id>& users, const char* outfile) { for (unsigned u = 0; u < users.size(); u++) { user_id id = users[u]; Subgraph* sub = new Subgraph(graph, id, TIMEPOINT); Params* prm = new Params(wvec, sub->subgraph, sub->mutual); sub->recompute_scores(*prm); PowerMethod* power = new PowerMethod(sub, prm, wvec.size()); power->pers_pagerank(); evaluate(power); // output after every 5000 users if (u % 5000 == 0) report(outfile); delete power; delete prm; delete sub; } report(outfile); }
StsgRule::StsgRule(const Subgraph &fragment) : m_targetSide(fragment, true) { // Source side const std::set<const Node *> &sinkNodes = fragment.GetLeaves(); // Collect the subset of sink nodes that excludes target nodes with // empty spans. std::vector<const Node *> productiveSinks; productiveSinks.reserve(sinkNodes.size()); for (std::set<const Node *>::const_iterator p = sinkNodes.begin(); p != sinkNodes.end(); ++p) { const Node *sink = *p; if (!sink->GetSpan().empty()) { productiveSinks.push_back(sink); } } // Sort them into the order defined by their spans. std::sort(productiveSinks.begin(), productiveSinks.end(), PartitionOrderComp); // Build a map from target nodes to source-order indices, so that we // can construct the Alignment object later. std::map<const Node *, std::vector<int> > sinkToSourceIndices; std::map<const Node *, int> nonTermSinkToSourceIndex; m_sourceSide.reserve(productiveSinks.size()); int srcIndex = 0; int nonTermCount = 0; for (std::vector<const Node *>::const_iterator p = productiveSinks.begin(); p != productiveSinks.end(); ++p, ++srcIndex) { const Node &sink = **p; if (sink.GetType() == TREE) { m_sourceSide.push_back(Symbol("X", NonTerminal)); sinkToSourceIndices[&sink].push_back(srcIndex); nonTermSinkToSourceIndex[&sink] = nonTermCount++; } else { assert(sink.GetType() == SOURCE); m_sourceSide.push_back(Symbol(sink.GetLabel(), Terminal)); // Add all aligned target words to the sinkToSourceIndices map const std::vector<Node *> &parents(sink.GetParents()); for (std::vector<Node *>::const_iterator q = parents.begin(); q != parents.end(); ++q) { if ((*q)->GetType() == TARGET) { sinkToSourceIndices[*q].push_back(srcIndex); } } } } // Alignment std::vector<const Node *> targetLeaves; m_targetSide.GetTargetLeaves(targetLeaves); m_alignment.reserve(targetLeaves.size()); m_nonTermAlignment.resize(nonTermCount); for (int i = 0, j = 0; i < targetLeaves.size(); ++i) { const Node *leaf = targetLeaves[i]; assert(leaf->GetType() != SOURCE); if (leaf->GetSpan().empty()) { continue; } std::map<const Node *, std::vector<int> >::iterator p = sinkToSourceIndices.find(leaf); assert(p != sinkToSourceIndices.end()); std::vector<int> &sourceNodes = p->second; for (std::vector<int>::iterator r = sourceNodes.begin(); r != sourceNodes.end(); ++r) { int srcIndex = *r; m_alignment.push_back(std::make_pair(srcIndex, i)); } if (leaf->GetType() == TREE) { m_nonTermAlignment[nonTermSinkToSourceIndex[leaf]] = j++; } } }
ScfgRule::ScfgRule(const Subgraph &fragment) : m_sourceLHS("X", NonTerminal) , m_targetLHS(fragment.GetRoot()->GetLabel(), NonTerminal) , m_pcfgScore(fragment.GetPcfgScore()) { // Source RHS const std::set<const Node *> &leaves = fragment.GetLeaves(); std::vector<const Node *> sourceRHSNodes; sourceRHSNodes.reserve(leaves.size()); for (std::set<const Node *>::const_iterator p(leaves.begin()); p != leaves.end(); ++p) { const Node &leaf = **p; if (!leaf.GetSpan().empty()) { sourceRHSNodes.push_back(&leaf); } } std::sort(sourceRHSNodes.begin(), sourceRHSNodes.end(), PartitionOrderComp); // Build a mapping from target nodes to source-order indices, so that we // can construct the Alignment object later. std::map<const Node *, std::vector<int> > sourceOrder; m_sourceRHS.reserve(sourceRHSNodes.size()); int srcIndex = 0; for (std::vector<const Node *>::const_iterator p(sourceRHSNodes.begin()); p != sourceRHSNodes.end(); ++p, ++srcIndex) { const Node &sinkNode = **p; if (sinkNode.GetType() == TREE) { m_sourceRHS.push_back(Symbol("X", NonTerminal)); sourceOrder[&sinkNode].push_back(srcIndex); } else { assert(sinkNode.GetType() == SOURCE); m_sourceRHS.push_back(Symbol(sinkNode.GetLabel(), Terminal)); // Add all aligned target words to the sourceOrder map const std::vector<Node *> &parents(sinkNode.GetParents()); for (std::vector<Node *>::const_iterator q(parents.begin()); q != parents.end(); ++q) { if ((*q)->GetType() == TARGET) { sourceOrder[*q].push_back(srcIndex); } } } } // Target RHS + alignment std::vector<const Node *> targetLeaves; fragment.GetTargetLeaves(targetLeaves); m_alignment.reserve(targetLeaves.size()); // might be too much but that's OK m_targetRHS.reserve(targetLeaves.size()); for (std::vector<const Node *>::const_iterator p(targetLeaves.begin()); p != targetLeaves.end(); ++p) { const Node &leaf = **p; if (leaf.GetSpan().empty()) { // The node doesn't cover any source words, so we can only add // terminals to the target RHS (not a non-terminal). std::vector<std::string> targetWords(leaf.GetTargetWords()); for (std::vector<std::string>::const_iterator q(targetWords.begin()); q != targetWords.end(); ++q) { m_targetRHS.push_back(Symbol(*q, Terminal)); } } else if (leaf.GetType() == SOURCE) { // Do nothing } else { SymbolType type = (leaf.GetType() == TREE) ? NonTerminal : Terminal; m_targetRHS.push_back(Symbol(leaf.GetLabel(), type)); int tgtIndex = m_targetRHS.size()-1; std::map<const Node *, std::vector<int> >::iterator q(sourceOrder.find(&leaf)); assert(q != sourceOrder.end()); std::vector<int> &sourceNodes = q->second; for (std::vector<int>::iterator r(sourceNodes.begin()); r != sourceNodes.end(); ++r) { int srcIndex = *r; m_alignment.push_back(std::make_pair(srcIndex, tgtIndex)); } } } }
void RetroSearch(const MatchSet* m, const Subgraph& join_subgraph, int sj_tree_node) { #ifdef TIMING struct timeval start, end; gettimeofday(&start, NULL); #endif int next = 0; // cout << " LOG Filling kv_scratchpad" << endl; // Extract the nodes from G_d where to extend the search. for (Subgraph::VertexIterator v_it = join_subgraph.BeginVertexSet(); v_it != join_subgraph.EndVertexSet(); v_it++) { uint64_t query_node = *v_it; uint64_t data_graph_node = m->GetMatch(query_node); uint64_t degree = gSearch__->NeighborCount(data_graph_node); kv_scratchpad[next++] = pair<uint64_t, uint64_t>(degree, data_graph_node); } // assert(next > 0); if (next == 0) { return; } // cout << " LOG sorting kv_scratchpad" << endl; sort(kv_scratchpad, kv_scratchpad + next); // cout << " LOG Determining init_search" << endl; uint64_t init_search = kv_scratchpad[0].second; d_array<MatchSet*> match_list; // cout << " LOG Executing search: " << sj_tree_node << endl; int leaf_index = -1; for (int i = 0; i < leaf_count__; i++) { // cout << " LOG leaf node id: " << leaf_ids__[i] << endl; if (leaf_ids__[i] == sj_tree_node) { leaf_index = i; break; } } assert(leaf_index >= 0); leaf_queries__[leaf_index]->Execute(init_search, match_list); // cout << " LOG Done executing search " << endl; for (int i = 0, N = match_list.len; i < N; i++) { // cout << " LOG Transforming match results" << endl; TransformMatchSet(id_maps__[leaf_index], match_list[i]); bool match_found = true; for (Subgraph::VertexIterator v_it = join_subgraph.BeginVertexSet(); v_it != join_subgraph.EndVertexSet(); v_it++) { uint64_t query_node = *v_it; uint64_t match1 = m->GetMatch(query_node); uint64_t match2 = match_list[i]->GetMatch(query_node); if (match1 != match2) { match_found = false; break; } } if (match_found) { // cout << " LOG ProcessMatch" << endl; ProcessMatch(match_list[i], sj_tree_node); } } #ifdef TIMING gettimeofday(&end, NULL); retro_search_time__ += get_tv_diff(start, end); #endif return; }
// Personalized pagerank starting from vertex start (at index 0) void pers_pagerank() { Graph *graph = sub->subgraph; unsigned iter = 0; double err = 1.0; // We are done when maxiteration is reached // or the error is small enough. while (iter++ < maxiter && err > tolerance) { // copy last iteration to last array // and clear pagerank array #pragma omp parallel for for (unsigned i = 0; i < nvert; i++) { last[i] = pagerank[i]; pagerank[i] = 0.0; } // sum up the nodes without outgoing edges ("dangling nodes"). // their pagerank sum will be uniformly distributed among all nodes. double zsum = 0.0; #pragma omp parallel for reduction(+:zsum) for (unsigned i = 0; i < sub->zerodeg.size(); i++) zsum += last[ sub->zerodeg[i] ]; double nolinks = (1.0-alpha) * zsum / nvert; pagerank[0] += alpha; // add teleport probability to the start vertex #pragma omp parallel for for (unsigned id = 0; id < nvert; id++) { double update = (1.0-alpha) * last[id]; for (Graph::iterator e = graph->iterate_outgoing_edges(id); !e.end(); e++) { #pragma omp atomic pagerank[(*e).v2] += (update * sub->score(id, (*e).v2)); } #pragma omp atomic pagerank[id] += nolinks; // pagerank from "dangling nodes" } // sum the pagerank double sum = 0.0; #pragma omp parallel for reduction(+:sum) for (unsigned i = 0; i < nvert; i++) sum += pagerank[i]; // normalize to valid probabilities, from 0 to 1. sum = 1.0 / sum; #pragma omp parallel for for (unsigned i = 0; i < nvert; i++) pagerank[i] *= sum; // sum up the error err = 0.0; #pragma omp parallel for reduction(+:err) for (unsigned i = 0; i < nvert; i++) err += fabs(pagerank[i] - last[i]); //cout << "Iteration " << iter << endl; //cout << "Error: " << err << endl; } //cout << "PageRank iterations: " << iter << endl; }