void graph_miner_mpi_dyn::project(Projected &projected, int dfs_level) { if(is_min() == false) { return; } else { } // Check if the pattern is frequent enough. unsigned int sup = support(projected); if(sup < minimal_support) return; DEBUG(*logger, "DFS level = " << dfs_level); DEBUG(*(graph_miner::logger), "executing project for code: " << DFS_CODE.to_string() << "; support: " << sup); // Output the frequent substructure report(projected, sup); // In case we have a valid upper bound and our graph already exceeds it, // return. Note: we do not check for equality as the DFS exploration may // still add edges within an existing subgraph, without increasing the // number of nodes. // //if(maxpat_max > maxpat_min && DFS_CODE.nodeCount() > maxpat_max) return; // We just outputted a frequent subgraph. As it is frequent enough, so // might be its (n+1)-extension-graphs, hence we enumerate them all. const RMPath &rmpath = DFS_CODE.buildRMPath(); int minlabel = DFS_CODE[0].fromlabel; int maxtoc = DFS_CODE[rmpath[0]].to; Projected_map3 new_fwd_root; Projected_map2 new_bck_root; types::EdgeList edges; current_dfs_level = dfs_level; // Enumerate all possible one edge extensions of the current substructure. for(unsigned int n = 0; n < projected.size(); ++n) { unsigned int id = projected[n].id; PDFS *cur = &projected[n]; History history(graph, cur); // XXX: do we have to change something here for directed edges? // backward for(int i = (int)rmpath.size() - 1; i >= 1; --i) { Edge *e = get_backward(graph, history[rmpath[i]], history[rmpath[0]], history); if(e) new_bck_root[DFS_CODE[rmpath[i]].from][e->elabel].push(id, e, cur); } // pure forward // FIXME: here we pass a too large e->to (== history[rmpath[0]]->to // into get_forward_pure, such that the assertion fails. // // The problem is: // history[rmpath[0]]->to > graph.size() if(get_forward_pure(graph, history[rmpath[0]], minlabel, history, edges)) { for(types::EdgeList::iterator it = edges.begin(); it != edges.end(); ++it) { new_fwd_root[maxtoc][(*it)->elabel][graph[(*it)->to].label].push(id, *it, cur); } } // backtracked forward for(int i = 0; i < (int)rmpath.size(); ++i) { if(get_forward_rmpath(graph, history[rmpath[i]], minlabel, history, edges)) { for(types::EdgeList::iterator it = edges.begin(); it != edges.end(); ++it) { new_fwd_root[DFS_CODE[rmpath[i]].from][(*it)->elabel][graph[(*it)->to].label].push(id, *it, cur); } // for it } // if } // for i } // for n std::deque<types::DFS> tmp; if(dfs_task_queue.size() <= dfs_level) { dfs_task_queue.push_back(tmp); } // Test all extended substructures. // backward for(Projected_iterator2 to = new_bck_root.begin(); to != new_bck_root.end(); ++to) { for(Projected_iterator1 elabel = to->second.begin(); elabel != to->second.end(); ++elabel) { DFS dfs(maxtoc, to->first, -1, elabel->first, -1); dfs_task_queue[dfs_level].push_back(dfs); load_balance(); } } // forward for(Projected_riterator3 from = new_fwd_root.rbegin(); from != new_fwd_root.rend(); ++from) { for(Projected_iterator2 elabel = from->second.begin(); elabel != from->second.end(); ++elabel) { for(Projected_iterator1 tolabel = elabel->second.begin(); tolabel != elabel->second.end(); ++tolabel) { DFS dfs(from->first, maxtoc + 1, -1, elabel->first, tolabel->first); dfs_task_queue[dfs_level].push_back(dfs); load_balance(); } } } //current_dfs_level = dfs_level; //current_dfs_level = dfs_level + 1; while(dfs_task_queue[dfs_level].size() > 0) { DFS dfs = dfs_task_queue[dfs_level].front(); dfs_task_queue[dfs_level].pop_front(); DEBUG(*logger, "popped dfs = " << dfs.to_string() ); current_dfs_level = dfs_level; load_balance(); DFS_CODE.push(dfs.from, dfs.to, dfs.fromlabel, dfs.elabel, dfs.tolabel); if(dfs.is_backward()) project(new_bck_root[dfs.to][dfs.elabel], dfs_level + 1); //Projected (PDFS vector): each entry contains graph id 0, edge pointer, null PDFS else project(new_fwd_root[dfs.from][dfs.elabel][dfs.tolabel], dfs_level + 1); //Projected (PDFS vector): each entry contains graph id 0, edge pointer, null PDFS DFS_CODE.pop(); } //current_dfs_level = dfs_level; return; }
void gSpan::project(Projected& projected) { UINT sup = support(projected); std::cout << "Support: " << sup << std::endl; if(sup<minsup) return; if(!is_min()) { //*os << "NOT MIN ["; //DFS_CODE.write (*os); //*os << "]" << std::endl; return; } Graph g(false); DFS_CODE.toGraph(g); if(g.size() > maxVertices || g.edge_size()> maxEdges) return; //report(projected,sup); ID++; //std::cout <<"x = "<< x << std::endl; if(maxpat_max>maxpat_min && DFS_CODE.nodeCount() > maxpat_max) return; const RMPath& rmpath = DFS_CODE.buildRMPath(); int minlabel = DFS_CODE[0].fromlabel; int maxtoc = DFS_CODE[rmpath[0]].to; Projected_map3 new_fwd_root; Projected_map2 new_bck_root; EdgeList edges; for(UINT n = 0; n < projected.size(); ++n) { UINT id = projected[n].id; PDFS* cur = & projected[n]; History history(TRANS[id],cur); for(int i = (int) rmpath.size()-1; i>=1 ; --i) { Edge* e = get_backward(TRANS[id], history[rmpath[i]],history[rmpath[0]],history); if(e) new_bck_root[DFS_CODE[rmpath[i]].from][e->elabel].push(id,e,cur); //inserting the backward edge in DFS COde } if(get_forward_pure(TRANS[id], history[rmpath[0]], minlabel, history, edges)) for(EdgeList::iterator it = edges.begin(); it != edges.end(); ++it) new_fwd_root[maxtoc][(*it)->elabel][TRANS[id][(*it)->to].label].push(id,*it,cur); for(int i=0; i<(int)rmpath.size(); ++i) if(get_forward_rmpath(TRANS[id],history[rmpath[i]],minlabel,history,edges)) for(EdgeList::iterator it = edges.begin(); it!=edges.end(); ++it) new_fwd_root[DFS_CODE[rmpath[i]].from][(*it)->elabel][TRANS[id][(*it)->to].label].push(id,*it,cur); } std::vector<Graph> List; std::vector<Projected> proj_vec; vector<pair<Graph,int> > sorted; std::vector<string> FwEdge; char ch[1000]; int index=0; // Adding backward edge to subgraph for(Projected_iterator2 to = new_bck_root.begin(); to!=new_bck_root.end(); ++to) { for(Projected_iterator1 elabel= to->second.begin(); elabel!=to->second.end(); ++elabel) { sprintf(ch,"%d %d %d %d %d",maxtoc, to->first,-1,elabel->first,-1); FwEdge.push_back(ch); Projected proj = elabel->second; proj_vec.push_back(proj); } } int x[5]; sorted.clear(); for(UINT i=0; i< (int)FwEdge.size(); i++) { sscanf(FwEdge[i].c_str(),"%d %d %d %d %d",&x[0],&x[1],&x[2],&x[3],&x[4]); DFS_CODE.push(x[0],x[1],x[2],x[3],x[4]); Graph g(directed); DFS_CODE.toGraph(g); /*Calculate_MDL(g,(UINT)NumOfLabels); std::set<int> Gindex = total_occurance(proj_vec[i]); g.MDL = EvaluateGraph(TRANS,g,Gindex); */ //g.occurrence = support_counts(proj_vec[i]); g.Frequency = support(proj_vec[i]); g.SET_COVER = (double)g.Frequency / (double) TRANS.size(); sorted.push_back(make_pair(g,index++)); //ListPairInsert(sorted,std::make_pair(g,index++),BeamWidth); DFS_CODE.pop(); } sort(sorted.begin(),sorted.end(),SetPairComp); for(UINT i= 0; i< std::min((int)FwEdge.size(),BeamWidth); i++) { sscanf(FwEdge[sorted[i].second].c_str(),"%d %d %d %d %d",&x[0],&x[1],&x[2],&x[3],&x[4]); DFS_CODE.push(x[0],x[1],x[2],x[3],x[4]); BestList.push_back(sorted[i].first); // ListInsert(BestList,sorted[i].first,maxBest); //ListInsert(BestList,sorted[i].first,5); project(proj_vec[sorted[i].second]); DFS_CODE.pop(); } sorted.clear(); FwEdge.clear(); List.clear(); proj_vec.clear(); index=0; //Adding forward Edge for(Projected_riterator3 from = new_fwd_root.rbegin(); from!=new_fwd_root.rend(); ++from) { for(Projected_iterator2 elabel = from->second.begin(); elabel != from->second.end(); ++elabel) { for(Projected_iterator1 tolabel = elabel->second.begin(); tolabel!=elabel->second.end(); ++tolabel) { //DFS_CODE.push(from->first,maxtoc+1, -1, elabel->first, tolabel->first); Projected proj = tolabel->second; proj_vec.push_back(proj); // Graph g(directed); // DFS_CODE.toGraph(g); // Calculate_MDL(g,NumOfLabels); // std::set<int> occurrence = total_occurance(proj); // g.MDL = EvaluateGraph(TRANS,g,occurrence); // sorted.push_back(make_pair(g,index)); // DFS_CODE.pop(); sprintf(ch,"%d %d %d %d %d",from->first,maxtoc+1, -1, elabel->first, tolabel->first); FwEdge.push_back(ch); } } } for(UINT i=0; i< (int)FwEdge.size(); i++) { sscanf(FwEdge[i].c_str(),"%d %d %d %d %d",&x[0],&x[1],&x[2],&x[3],&x[4]); DFS_CODE.push(x[0],x[1],x[2],x[3],x[4]); Graph g(directed); DFS_CODE.toGraph(g); // Calculate_MDL(g,NumOfLabels); // std::set<int> Gindex = total_occurance(proj_vec[i]); // // // g.MDL = EvaluateGraph(TRANS,g,Gindex); //g.occurrence = support_counts(proj_vec[i]); g.Frequency = support(proj_vec[i]); g.SET_COVER = (double)g.Frequency / (double) TRANS.size(); sorted.push_back(make_pair(g,index++)); //ListPairInsert(sorted,std::make_pair(g,index++),BeamWidth); //ListPairInsert(sorted,PGI(g,index++),BeamWidth); DFS_CODE.pop(); } sort(sorted.begin(),sorted.end(),SetPairComp); //reverse(sorted.begin(),sorted.end()); //int N = sorted.size(); //vector<Graph> sorted; for(UINT i= 0; i< std::min((int)sorted.size(),BeamWidth); i++) { sscanf(FwEdge[sorted[i].second].c_str(),"%d %d %d %d %d",&x[0],&x[1],&x[2],&x[3],&x[4]); //sscanf(FwEdge[i].c_str(),"%d %d %d %d %d",&x[0],&x[1],&x[2],&x[3],&x[4]); DFS_CODE.push(x[0],x[1],x[2],x[3],x[4]); //ListInsert(BestList,sorted[i].first,maxBest); //ListInsert(BestList,sorted[i].first,5); BestList.push_back(sorted[i].first); project(proj_vec[sorted[i].second]); DFS_CODE.pop(); } // // for(UINT i= 0; i< std::min(N,BeamWidth);i++) { // BestList.push_back(sorted[i].first); // } //sorted.clear(); // for(UINT i= 0; i< (int)List.size();i++){ // DFS_CODE.push(FwEdge[i].from_id,FwEdge[i].to_id,FwEdge[i].from,FwEdge[i].elabel,FwEdge[i].to); // project(proj_vec[i]); // DFS_CODE.pop(); // } // sorted.clear(); FwEdge.clear(); List.clear(); proj_vec.clear(); // return; }
void graph_miner_mpi_dyn::regenerate_embeddings(Projected &projected, int dfs_level) { // We don't need to check if the pattern is frequent or minimal DEBUG(*(graph_miner::logger), "DFS level inside regenerate embeddings = " << dfs_level << " queue size = " << dfs_task_queue[dfs_level].size()); //not necessary though, as task split is not done while regenerating embeddings //current_dfs_level = dfs_level + 1; //iterate for all in the task_queue for(int i = 0; dfs_task_queue[dfs_level].size() > 0; i++) { types::DFS dfs = dfs_task_queue[dfs_level].front(); dfs_task_queue[dfs_level].pop_front(); current_dfs_level = dfs_level; load_balance(); DFS_CODE.push(dfs.from, dfs.to, dfs.fromlabel, dfs.elabel, dfs.tolabel); DEBUG(*(graph_miner::logger), "*****regenerating embeddings for code: " << DFS_CODE.to_string() ); //const RMPath &rmpath = DFS_CODE.buildRMPath(); //int minlabel = DFS_CODE[0].fromlabel; //int maxtoc = DFS_CODE[rmpath[0]].to; Projected new_root; for(unsigned int n = 0; n < projected.size(); ++n) { unsigned int id = projected[n].id; PDFS *cur = &projected[n]; History history(graph, cur); if(dfs.is_backward() ) { Edge *e = get_backward(graph, DFS_CODE, history); if(e) new_root.push(id, e, cur); }else{ types::EdgeList edges; if(get_forward(graph, DFS_CODE, history, edges)) { for(types::EdgeList::iterator it = edges.begin(); it != edges.end(); ++it) { new_root.push(id, *it, cur); } } } } if( embeddings_regeneration_level > dfs_level ) { regenerate_embeddings(new_root, dfs_level + 1); }else{ //regeneration of embeddings ended //now perform regular extensions with project function //reset embeddings_regeneration_level //embeddings_regeneration_level = 0; project(new_root, dfs_level + 1); } DFS_CODE.pop(); } //current_dfs_level = dfs_level; return; }
bool graph_miner_mpi_omp_hybrid::project_is_min(int thread_id, Projected &projected) { const RMPath& rmpath = DFS_CODE_IS_MIN_V[thread_id].buildRMPath(); int minlabel = DFS_CODE_IS_MIN_V[thread_id][0].fromlabel; int maxtoc = DFS_CODE_IS_MIN_V[thread_id][rmpath[0]].to; // SUBBLOCK 1 { Projected_map1 root; bool flg = false; int newto = 0; for(int i = rmpath.size() - 1; !flg && i >= 1; --i) { for(unsigned int n = 0; n < projected.size(); ++n) { PDFS *cur = &projected[n]; History history(GRAPH_IS_MIN_V[thread_id], cur); Edge *e = get_backward(GRAPH_IS_MIN_V[thread_id], history[rmpath[i]], history[rmpath[0]], history); if(e) { root[e->elabel].push(0, e, cur); newto = DFS_CODE_IS_MIN_V[thread_id][rmpath[i]].from; flg = true; } // if e } // for n } // for i if(flg) { Projected_iterator1 elabel = root.begin(); DFS_CODE_IS_MIN_V[thread_id].push(maxtoc, newto, -1, elabel->first, -1); if(DFS_CODE_V[thread_id][DFS_CODE_IS_MIN_V[thread_id].size() - 1] != DFS_CODE_IS_MIN_V[thread_id][DFS_CODE_IS_MIN_V[thread_id].size() - 1]) return false; return project_is_min(thread_id, elabel->second); } } // SUBBLOCK 1 // SUBBLOCK 2 { bool flg = false; int newfrom = 0; Projected_map2 root; types::EdgeList edges; for(unsigned int n = 0; n < projected.size(); ++n) { PDFS *cur = &projected[n]; History history(GRAPH_IS_MIN_V[thread_id], cur); if(get_forward_pure(GRAPH_IS_MIN_V[thread_id], history[rmpath[0]], minlabel, history, edges)) { flg = true; newfrom = maxtoc; for(types::EdgeList::iterator it = edges.begin(); it != edges.end(); ++it) root[(*it)->elabel][GRAPH_IS_MIN_V[thread_id][(*it)->to].label].push(0, *it, cur); } // if get_forward_pure } // for n for(int i = 0; !flg && i < (int)rmpath.size(); ++i) { for(unsigned int n = 0; n < projected.size(); ++n) { PDFS *cur = &projected[n]; History history(GRAPH_IS_MIN_V[thread_id], cur); if(get_forward_rmpath(GRAPH_IS_MIN_V[thread_id], history[rmpath[i]], minlabel, history, edges)) { flg = true; newfrom = DFS_CODE_IS_MIN_V[thread_id][rmpath[i]].from; for(types::EdgeList::iterator it = edges.begin(); it != edges.end(); ++it) root[(*it)->elabel][GRAPH_IS_MIN_V[thread_id][(*it)->to].label].push(0, *it, cur); } // if get_forward_rmpath } // for n } // for i if(flg) { Projected_iterator2 elabel = root.begin(); Projected_iterator1 tolabel = elabel->second.begin(); DFS_CODE_IS_MIN_V[thread_id].push(newfrom, maxtoc + 1, -1, elabel->first, tolabel->first); if(DFS_CODE_V[thread_id][DFS_CODE_IS_MIN_V[thread_id].size() - 1] != DFS_CODE_IS_MIN_V[thread_id][DFS_CODE_IS_MIN_V[thread_id].size() - 1]) return false; return project_is_min(thread_id, tolabel->second); } // if(flg) } // SUBBLOCK 2 return true; } // graph_miner::project_is_min