int gSpan::support_counts(Projected& projected)
{
    std::map<UINT,UINT> counts;
    for(Projected::iterator cur = projected.begin(); cur!=projected.end(); ++cur)
        counts[cur->id]+=1;

    int total=0;
    for(std::map<UINT,UINT> :: iterator it = counts.begin(); it!=counts.end(); it++)
    {
        total += (*it).second;
    }
    return total;
}
std::set<int> gSpan::total_occurance(Projected& projected) {
    std::set<int> total;

    for(Projected::iterator cur = projected.begin(); cur!= projected.end(); ++cur)
    {

        total.insert(cur->id);
        //std::cout <<" From: " << edge->from << " To: " << edge->to << " ELabel: " << edge->elabel << endl;
        //     edge = edge->
        // }

    }
    return total;
}
UINT gSpan::support (Projected& projected)
{
    UINT oid = 0xffffffff;
    UINT size = 0;

    //Graph g(directed);
    //DFS_CODE.toGraph(g);
    //g.write();
    for(Projected::iterator cur=projected.begin(); cur!=projected.end(); ++cur)
    {

        //std::cout << "Support Cur id: "<< cur->id << "Edge: From: " << cur->edge->from << " To: " << cur->edge->to << " Elabel: " << cur->edge->elabel << std::endl;
        if(oid!=cur->id)
            ++size;
        oid = cur->id;
    }

    return size;
}
Esempio n. 4
0
void graph_miner_mpi_dyn::project(Projected &projected, int dfs_level)
{

  if(is_min() == false) {
    return;
  } else {

  }

  // Check if the pattern is frequent enough.
  unsigned int sup = support(projected);

  if(sup < minimal_support) return;

  DEBUG(*logger, "DFS level = " << dfs_level);

  DEBUG(*(graph_miner::logger), "executing project for code: " << DFS_CODE.to_string() << "; support: " << sup);

  // Output the frequent substructure
  report(projected, sup);

  // In case we have a valid upper bound and our graph already exceeds it,
  // return.  Note: we do not check for equality as the DFS exploration may
  // still add edges within an existing subgraph, without increasing the
  // number of nodes.
  //
  //if(maxpat_max > maxpat_min && DFS_CODE.nodeCount() > maxpat_max) return;

  // We just outputted a frequent subgraph.  As it is frequent enough, so
  // might be its (n+1)-extension-graphs, hence we enumerate them all.
  const RMPath &rmpath = DFS_CODE.buildRMPath();
  int minlabel = DFS_CODE[0].fromlabel;
  int maxtoc = DFS_CODE[rmpath[0]].to;

  Projected_map3 new_fwd_root;
  Projected_map2 new_bck_root;
  types::EdgeList edges;

  current_dfs_level = dfs_level;

  // Enumerate all possible one edge extensions of the current substructure.
  for(unsigned int n = 0; n < projected.size(); ++n) {

    unsigned int id = projected[n].id;
    PDFS *cur = &projected[n];
    History history(graph, cur);

    // XXX: do we have to change something here for directed edges?

    // backward
    for(int i = (int)rmpath.size() - 1; i >= 1; --i) {
      Edge *e = get_backward(graph, history[rmpath[i]], history[rmpath[0]], history);
      if(e)
        new_bck_root[DFS_CODE[rmpath[i]].from][e->elabel].push(id, e, cur);
    }

    // pure forward
    // FIXME: here we pass a too large e->to (== history[rmpath[0]]->to
    // into get_forward_pure, such that the assertion fails.
    //
    // The problem is:
    // history[rmpath[0]]->to > graph.size()
    if(get_forward_pure(graph, history[rmpath[0]], minlabel, history, edges)) {
      for(types::EdgeList::iterator it = edges.begin(); it != edges.end(); ++it) {
        new_fwd_root[maxtoc][(*it)->elabel][graph[(*it)->to].label].push(id, *it, cur);
      }
    }

    // backtracked forward
    for(int i = 0; i < (int)rmpath.size(); ++i) {
      if(get_forward_rmpath(graph, history[rmpath[i]], minlabel, history, edges)) {
        for(types::EdgeList::iterator it = edges.begin(); it != edges.end(); ++it) {
          new_fwd_root[DFS_CODE[rmpath[i]].from][(*it)->elabel][graph[(*it)->to].label].push(id, *it, cur);
        } // for it
      } // if
    } // for i
  } // for n


  std::deque<types::DFS> tmp;

  if(dfs_task_queue.size() <= dfs_level) {
    dfs_task_queue.push_back(tmp);
  }

  // Test all extended substructures.
  // backward
  for(Projected_iterator2 to = new_bck_root.begin(); to != new_bck_root.end(); ++to) {
    for(Projected_iterator1 elabel = to->second.begin(); elabel != to->second.end(); ++elabel) {

      DFS dfs(maxtoc, to->first, -1, elabel->first, -1);
      dfs_task_queue[dfs_level].push_back(dfs);

      load_balance();

    }
  }

  // forward
  for(Projected_riterator3 from = new_fwd_root.rbegin();
      from != new_fwd_root.rend(); ++from) {
    for(Projected_iterator2 elabel = from->second.begin();
        elabel != from->second.end(); ++elabel) {
      for(Projected_iterator1 tolabel = elabel->second.begin();
          tolabel != elabel->second.end(); ++tolabel) {

        DFS dfs(from->first, maxtoc + 1, -1, elabel->first, tolabel->first);
        dfs_task_queue[dfs_level].push_back(dfs);

        load_balance();

      }
    }
  }


  //current_dfs_level = dfs_level;
  //current_dfs_level = dfs_level + 1;

  while(dfs_task_queue[dfs_level].size() > 0) {

    DFS dfs = dfs_task_queue[dfs_level].front();
    dfs_task_queue[dfs_level].pop_front();
    DEBUG(*logger, "popped dfs = " << dfs.to_string() );

    current_dfs_level = dfs_level;
    load_balance();

    DFS_CODE.push(dfs.from, dfs.to, dfs.fromlabel, dfs.elabel, dfs.tolabel);

    if(dfs.is_backward())
      project(new_bck_root[dfs.to][dfs.elabel], dfs_level + 1);      //Projected (PDFS vector): each entry contains graph id 0, edge pointer, null PDFS
    else
      project(new_fwd_root[dfs.from][dfs.elabel][dfs.tolabel], dfs_level + 1);      //Projected (PDFS vector): each entry contains graph id 0, edge pointer, null PDFS

    DFS_CODE.pop();
  }

  //current_dfs_level = dfs_level;

  return;
}
Esempio n. 5
0
void graph_miner_mpi_dyn::regenerate_embeddings(Projected &projected, int dfs_level)
{
  // We don't need to check if the pattern is frequent or minimal

  DEBUG(*(graph_miner::logger), "DFS level inside regenerate embeddings = " << dfs_level << " queue size = " << dfs_task_queue[dfs_level].size());

  //not necessary though, as task split is not done while regenerating embeddings

  //current_dfs_level = dfs_level + 1;

  //iterate for all in the task_queue

  for(int i = 0; dfs_task_queue[dfs_level].size() > 0; i++) {

    types::DFS dfs = dfs_task_queue[dfs_level].front();
    dfs_task_queue[dfs_level].pop_front();

    current_dfs_level = dfs_level;
    load_balance();

    DFS_CODE.push(dfs.from, dfs.to, dfs.fromlabel, dfs.elabel, dfs.tolabel);

    DEBUG(*(graph_miner::logger), "*****regenerating embeddings for code: " << DFS_CODE.to_string() );

    //const RMPath &rmpath = DFS_CODE.buildRMPath();
    //int minlabel = DFS_CODE[0].fromlabel;
    //int maxtoc = DFS_CODE[rmpath[0]].to;

    Projected new_root;

    for(unsigned int n = 0; n < projected.size(); ++n) {

      unsigned int id = projected[n].id;
      PDFS *cur = &projected[n];
      History history(graph, cur);

      if(dfs.is_backward() ) {
        Edge *e = get_backward(graph, DFS_CODE, history);
        if(e)
          new_root.push(id, e, cur);
      }else{
        types::EdgeList edges;
        if(get_forward(graph, DFS_CODE, history, edges)) {
          for(types::EdgeList::iterator it = edges.begin(); it != edges.end(); ++it) {
            new_root.push(id, *it, cur);
          }
        }
      }
    }

    if( embeddings_regeneration_level > dfs_level ) {
      regenerate_embeddings(new_root, dfs_level + 1);
    }else{
      //regeneration of embeddings ended
      //now perform regular extensions with project function
      //reset embeddings_regeneration_level
      //embeddings_regeneration_level = 0;
      project(new_root, dfs_level + 1);
    }

    DFS_CODE.pop();

  }
  //current_dfs_level = dfs_level;

  return;
}
void gSpan::project(Projected& projected)
{

    UINT sup = support(projected);
    std::cout << "Support: " << sup << std::endl;
    if(sup<minsup)
        return;

    if(!is_min()) {
        //*os << "NOT MIN [";
        //DFS_CODE.write (*os);
        //*os << "]" << std::endl;
        return;
    }

    Graph g(false);
    DFS_CODE.toGraph(g);
    if(g.size() > maxVertices || g.edge_size()> maxEdges)
        return;

    //report(projected,sup);
    ID++;

    //std::cout <<"x = "<< x << std::endl;
    if(maxpat_max>maxpat_min && DFS_CODE.nodeCount() > maxpat_max)
        return;

    const RMPath& rmpath = DFS_CODE.buildRMPath();
    int minlabel = DFS_CODE[0].fromlabel;
    int maxtoc = DFS_CODE[rmpath[0]].to;

    Projected_map3 new_fwd_root;
    Projected_map2 new_bck_root;
    EdgeList edges;

    for(UINT n = 0; n < projected.size(); ++n)
    {
        UINT id = projected[n].id;
        PDFS* cur = & projected[n];
        History history(TRANS[id],cur);

        for(int i = (int) rmpath.size()-1; i>=1 ; --i) {
            Edge* e = get_backward(TRANS[id], history[rmpath[i]],history[rmpath[0]],history);

            if(e)
                new_bck_root[DFS_CODE[rmpath[i]].from][e->elabel].push(id,e,cur);   //inserting the backward edge in DFS COde
        }

        if(get_forward_pure(TRANS[id], history[rmpath[0]], minlabel, history, edges))
            for(EdgeList::iterator it = edges.begin(); it != edges.end(); ++it)
                new_fwd_root[maxtoc][(*it)->elabel][TRANS[id][(*it)->to].label].push(id,*it,cur);

        for(int i=0; i<(int)rmpath.size(); ++i)
            if(get_forward_rmpath(TRANS[id],history[rmpath[i]],minlabel,history,edges))
                for(EdgeList::iterator it = edges.begin(); it!=edges.end(); ++it)
                    new_fwd_root[DFS_CODE[rmpath[i]].from][(*it)->elabel][TRANS[id][(*it)->to].label].push(id,*it,cur);

    }

    std::vector<Graph>	List;
    std::vector<Projected> proj_vec;
    vector<pair<Graph,int> > sorted;
    std::vector<string> FwEdge;
    char ch[1000];
    int index=0;

    // Adding backward edge to subgraph
    for(Projected_iterator2 to = new_bck_root.begin(); to!=new_bck_root.end(); ++to) {
        for(Projected_iterator1 elabel= to->second.begin(); elabel!=to->second.end(); ++elabel) {
            sprintf(ch,"%d %d %d %d %d",maxtoc, to->first,-1,elabel->first,-1);
            FwEdge.push_back(ch);
            Projected proj = elabel->second;
            proj_vec.push_back(proj);
        }
    }

    int x[5];
    sorted.clear();
    for(UINT i=0; i< (int)FwEdge.size(); i++)
    {
        sscanf(FwEdge[i].c_str(),"%d %d %d %d %d",&x[0],&x[1],&x[2],&x[3],&x[4]);
        DFS_CODE.push(x[0],x[1],x[2],x[3],x[4]);
        Graph g(directed);
        DFS_CODE.toGraph(g);

        /*Calculate_MDL(g,(UINT)NumOfLabels);
        std::set<int> Gindex = total_occurance(proj_vec[i]);


        g.MDL = EvaluateGraph(TRANS,g,Gindex);
         */
        //g.occurrence = support_counts(proj_vec[i]);
        g.Frequency = support(proj_vec[i]);
        g.SET_COVER = (double)g.Frequency / (double) TRANS.size();
        sorted.push_back(make_pair(g,index++));
        //ListPairInsert(sorted,std::make_pair(g,index++),BeamWidth);
        DFS_CODE.pop();
    }

    sort(sorted.begin(),sorted.end(),SetPairComp);


    for(UINT i= 0; i< std::min((int)FwEdge.size(),BeamWidth); i++) {
        sscanf(FwEdge[sorted[i].second].c_str(),"%d %d %d %d %d",&x[0],&x[1],&x[2],&x[3],&x[4]);
        DFS_CODE.push(x[0],x[1],x[2],x[3],x[4]);
        BestList.push_back(sorted[i].first);
//        ListInsert(BestList,sorted[i].first,maxBest);
        //ListInsert(BestList,sorted[i].first,5);
        project(proj_vec[sorted[i].second]);
        DFS_CODE.pop();
    }

    sorted.clear();
    FwEdge.clear();
    List.clear();
    proj_vec.clear();

    index=0;
    //Adding forward Edge

    for(Projected_riterator3 from = new_fwd_root.rbegin(); from!=new_fwd_root.rend(); ++from)
    {
        for(Projected_iterator2 elabel = from->second.begin(); elabel != from->second.end(); ++elabel)
        {
            for(Projected_iterator1 tolabel = elabel->second.begin(); tolabel!=elabel->second.end(); ++tolabel)
            {
                //DFS_CODE.push(from->first,maxtoc+1, -1, elabel->first, tolabel->first);
                Projected proj = tolabel->second;
                proj_vec.push_back(proj);
//                Graph g(directed);
//                DFS_CODE.toGraph(g);
//                Calculate_MDL(g,NumOfLabels);
//                std::set<int> occurrence = total_occurance(proj);
//                g.MDL = EvaluateGraph(TRANS,g,occurrence);
//                sorted.push_back(make_pair(g,index));
//                DFS_CODE.pop();

                sprintf(ch,"%d %d %d %d %d",from->first,maxtoc+1, -1, elabel->first, tolabel->first);
                FwEdge.push_back(ch);

            }
        }
    }

    for(UINT i=0; i< (int)FwEdge.size(); i++)
    {
        sscanf(FwEdge[i].c_str(),"%d %d %d %d %d",&x[0],&x[1],&x[2],&x[3],&x[4]);
        DFS_CODE.push(x[0],x[1],x[2],x[3],x[4]);
        Graph g(directed);
        DFS_CODE.toGraph(g);

//        Calculate_MDL(g,NumOfLabels);
//        std::set<int> Gindex = total_occurance(proj_vec[i]);
//
//
//        g.MDL = EvaluateGraph(TRANS,g,Gindex);
        //g.occurrence = support_counts(proj_vec[i]);
        g.Frequency = support(proj_vec[i]);
        g.SET_COVER = (double)g.Frequency / (double) TRANS.size();
        sorted.push_back(make_pair(g,index++));
        //ListPairInsert(sorted,std::make_pair(g,index++),BeamWidth);
        //ListPairInsert(sorted,PGI(g,index++),BeamWidth);
        DFS_CODE.pop();
    }
    sort(sorted.begin(),sorted.end(),SetPairComp);
    //reverse(sorted.begin(),sorted.end());
    //int N = sorted.size();

    //vector<Graph> sorted;

    for(UINT i= 0; i< std::min((int)sorted.size(),BeamWidth); i++) {
        sscanf(FwEdge[sorted[i].second].c_str(),"%d %d %d %d %d",&x[0],&x[1],&x[2],&x[3],&x[4]);
        //sscanf(FwEdge[i].c_str(),"%d %d %d %d %d",&x[0],&x[1],&x[2],&x[3],&x[4]);
        DFS_CODE.push(x[0],x[1],x[2],x[3],x[4]);
        //ListInsert(BestList,sorted[i].first,maxBest);
        //ListInsert(BestList,sorted[i].first,5);
        BestList.push_back(sorted[i].first);
        project(proj_vec[sorted[i].second]);
        DFS_CODE.pop();
    }
//
//    for(UINT i= 0; i< std::min(N,BeamWidth);i++) {
//        BestList.push_back(sorted[i].first);
//    }
    //sorted.clear();
//    for(UINT i= 0; i< (int)List.size();i++){
//        DFS_CODE.push(FwEdge[i].from_id,FwEdge[i].to_id,FwEdge[i].from,FwEdge[i].elabel,FwEdge[i].to);
//        project(proj_vec[i]);
//        DFS_CODE.pop();
//    }
//
    sorted.clear();
    FwEdge.clear();
    List.clear();
    proj_vec.clear();
//
    return;
}
void gSpan::report(Projected& projected,UINT sup)
{
    if(maxpat_max > maxpat_min && DFS_CODE.nodeCount() > maxpat_max)
        return;
    //*os << maxpat_min << ":" << DFS_CODE.nodeCount() << ":" << maxpat_min << std::endl;
    if(maxpat_min > 0 && DFS_CODE.nodeCount() < maxpat_min)
        return;
    if(where) {
#ifdef DEBUG
        /*
        *os<<"<pattern>\n";
        *os<<"<id>"<<ID<<"</id>\n";
        *os<<"<support>"<<sup<<"</support>\n";
        *os<<"<what>";
        fos<<"<pattern>\n";
        fos<<"<id>"<<ID<<"</id>\n";
        fos<<"<support>"<<sup<<"</support>\n";
        fos<<"<what>";
        */
//        *os << "where = " << where << " enc = " << enc << endl;
#endif
        *os << "t # " << ID << " " << sup << endl;

    }

    if(!enc) {
        Graph g(directed);
#ifdef DEBUG
//        std::cout << "in report g.size = " << g.size() << endl;
//        std::cout << "in report g.edge_size = " << g.edge_size() << endl;
//        std::cout << "in report g.vertex_size = " << g.vertex_size() << endl;
#endif
        DFS_CODE.toGraph(g);

        if(!where)
            *os << "t # " << ID << " * " << sup;//fos << "t # " << ID << " * " << sup;
        //*os << "\n";
        //fos << "\n";

        g.write(*os);

        //g.write(fos);
    } else {
        if(!where)
            *os << "<" << ID << "> " << sup << " [";
        DFS_CODE.write (*os);

        if(!where) *os << "]";
    }

    if(where) {
        /*
        *os << "</what>\n<where>";
        fos << "</what>\n<where>";
        */
        *os << "x ";
        UINT oid = 0xffffffff;
        for(Projected::iterator cur = projected.begin(); cur != projected.end(); ++cur) {
            if(oid != cur->id) {
                if(cur!=projected.begin()) *os<< " ";
                fos<< " ";
                *os<<cur->id;
                fos<<cur->id;
            }
            oid = cur->id;
        }
        /*
        *os << "</where>\n</pattern>";
        fos << "</where>\n</pattern>";
        */
    }
    *os<<"\n";
    //fos<<"\n";
    ++ID;
}
Esempio n. 8
0
bool graph_miner_mpi_omp_hybrid::project_is_min(int thread_id, Projected &projected)
{
  const RMPath& rmpath = DFS_CODE_IS_MIN_V[thread_id].buildRMPath();
  int minlabel         = DFS_CODE_IS_MIN_V[thread_id][0].fromlabel;
  int maxtoc           = DFS_CODE_IS_MIN_V[thread_id][rmpath[0]].to;


  // SUBBLOCK 1
  {
    Projected_map1 root;
    bool flg = false;
    int newto = 0;

    for(int i = rmpath.size() - 1; !flg  && i >= 1; --i) {
      for(unsigned int n = 0; n < projected.size(); ++n) {
        PDFS *cur = &projected[n];
        History history(GRAPH_IS_MIN_V[thread_id], cur);
        Edge *e = get_backward(GRAPH_IS_MIN_V[thread_id], history[rmpath[i]], history[rmpath[0]], history);
        if(e) {
          root[e->elabel].push(0, e, cur);
          newto = DFS_CODE_IS_MIN_V[thread_id][rmpath[i]].from;
          flg = true;
        } // if e
      } // for n
    } // for i

    if(flg) {
      Projected_iterator1 elabel = root.begin();
      DFS_CODE_IS_MIN_V[thread_id].push(maxtoc, newto, -1, elabel->first, -1);
      if(DFS_CODE_V[thread_id][DFS_CODE_IS_MIN_V[thread_id].size() - 1] != DFS_CODE_IS_MIN_V[thread_id][DFS_CODE_IS_MIN_V[thread_id].size() - 1]) return false;
      return project_is_min(thread_id, elabel->second);
    }
  } // SUBBLOCK 1

  // SUBBLOCK 2
  {
    bool flg = false;
    int newfrom = 0;
    Projected_map2 root;
    types::EdgeList edges;

    for(unsigned int n = 0; n < projected.size(); ++n) {
      PDFS *cur = &projected[n];
      History history(GRAPH_IS_MIN_V[thread_id], cur);
      if(get_forward_pure(GRAPH_IS_MIN_V[thread_id], history[rmpath[0]], minlabel, history, edges)) {
        flg = true;
        newfrom = maxtoc;
        for(types::EdgeList::iterator it = edges.begin(); it != edges.end(); ++it)
          root[(*it)->elabel][GRAPH_IS_MIN_V[thread_id][(*it)->to].label].push(0, *it, cur);
      } // if get_forward_pure
    } // for n

    for(int i = 0; !flg && i < (int)rmpath.size(); ++i) {
      for(unsigned int n = 0; n < projected.size(); ++n) {
        PDFS *cur = &projected[n];
        History history(GRAPH_IS_MIN_V[thread_id], cur);
        if(get_forward_rmpath(GRAPH_IS_MIN_V[thread_id], history[rmpath[i]], minlabel, history, edges)) {
          flg = true;
          newfrom = DFS_CODE_IS_MIN_V[thread_id][rmpath[i]].from;
          for(types::EdgeList::iterator it = edges.begin(); it != edges.end(); ++it)
            root[(*it)->elabel][GRAPH_IS_MIN_V[thread_id][(*it)->to].label].push(0, *it, cur);
        } // if get_forward_rmpath
      } // for n
    } // for i

    if(flg) {
      Projected_iterator2 elabel  = root.begin();
      Projected_iterator1 tolabel = elabel->second.begin();
      DFS_CODE_IS_MIN_V[thread_id].push(newfrom, maxtoc + 1, -1, elabel->first, tolabel->first);
      if(DFS_CODE_V[thread_id][DFS_CODE_IS_MIN_V[thread_id].size() - 1] != DFS_CODE_IS_MIN_V[thread_id][DFS_CODE_IS_MIN_V[thread_id].size() - 1]) return false;
      return project_is_min(thread_id, tolabel->second);
    } // if(flg)
  } // SUBBLOCK 2

  return true;
} // graph_miner::project_is_min