void extend ( LegOccurrences &legoccurrencesdata, EdgeLabel minlabel, EdgeLabel neglect ) {
  // we're trying hard to avoid repeated destructor/constructor calls for complex types like vectors.
  // better reuse previously allocated memory, if possible!
  vector<LegOccurrence> &legoccurrences = legoccurrencesdata.elements;
  int lastself[candidatelegsoccurrences.size ()];
  
  for ( int i = 0; i < candidatelegsoccurrences.size (); i++ ) {
    candidatelegsoccurrences[i].elements.resize ( 0 );
    candidatelegsoccurrences[i].parent = &legoccurrencesdata;
    candidatelegsoccurrences[i].number = legoccurrencesdata.number + 1;
    candidatelegsoccurrences[i].maxdegree = 0;
    candidatelegsoccurrences[i].selfjoin = 0;
    lastself[i] = NOTID;
    candidatelegsoccurrences[i].frequency = 0;
  }

  closelegsoccsused = false; // we are lazy with the initialization of close leg arrays, as we may not need them at all in
                             // many cases
  for ( OccurrenceId i = 0; i < legoccurrences.size (); i++ ) {
    LegOccurrence &legocc = legoccurrences[i];
    DatabaseTreePtr tree = database.trees[legocc.tid];
    DatabaseTreeNode &node = tree->nodes[legocc.tonodeid];
    for ( int j = 0; j < node.edges.size (); j++ ) {
      if ( node.edges[j].tonode != legocc.fromnodeid ) {
	EdgeLabel edgelabel = node.edges[j].edgelabel;
        int number = nocycle ( tree, node, node.edges[j].tonode, i, &legoccurrencesdata );
        if ( number == 0 ) {
	  if ( edgelabel >= minlabel && edgelabel != neglect ) {
            vector<LegOccurrence> &candidatelegsoccs = candidatelegsoccurrences[edgelabel].elements;
            if ( candidatelegsoccs.empty () )
  	      candidatelegsoccurrences[edgelabel].frequency++;
	    else {
	      if ( candidatelegsoccs.back ().tid != legocc.tid )
  	        candidatelegsoccurrences[edgelabel].frequency++;
	      if ( candidatelegsoccs.back ().occurrenceid == i &&
                lastself[edgelabel] != legocc.tid ) {
                lastself[edgelabel] = legocc.tid;
                candidatelegsoccurrences[edgelabel].selfjoin++;
              }
            }
            candidatelegsoccs.push_back ( LegOccurrence ( legocc.tid, i, node.edges[j].tonode, legocc.tonodeid ) );
	    setmax ( candidatelegsoccurrences[edgelabel].maxdegree, database.trees[legocc.tid]->nodes[node.edges[j].tonode].edges.size () );
	  }
        }
        else if ( number - 1 != graphstate.nodes.back().edges[0].tonode ) {
          candidateCloseLegsAllocate ( number, legoccurrencesdata.number + 1 );

          vector<CloseLegOccurrence> &candidatelegsoccs = candidatecloselegsoccs[number][edgelabel].elements;
          if ( !candidatelegsoccs.size () || candidatelegsoccs.back ().tid != legocc.tid )
	    candidatecloselegsoccs[number][edgelabel].frequency++;
          candidatelegsoccs.push_back ( CloseLegOccurrence ( legocc.tid, i ) );
          setmax ( candidatelegsoccurrences[edgelabel].maxdegree, database.trees[legocc.tid]->nodes[node.edges[j].tonode].edges.size () );
        }
      }
    }
  }
}
LegOccurrencesPtr join ( LegOccurrences &legoccsdata ) {
  if ( legoccsdata.selfjoin < minfreq ) 
    return NULL;
  legoccurrences.elements.resize ( 0 );
  vector<LegOccurrence> &legoccs = legoccsdata.elements;
  legoccurrences.maxdegree = 0;
  legoccurrences.selfjoin = 0;
  Tid lastself = NOTID;

  OccurrenceId j = 0, k, l, m;
  do {
    k = j;
    LegOccurrence &legocc = legoccs[k];
    do {
      j++;
    }
    while ( j < legoccs.size () &&
            legoccs[j].occurrenceid == legocc.occurrenceid );
    for ( l = k; l < j; l++ )
      for ( m = k; m < j; m++ )
        if ( l != m ) {
          legoccurrences.elements.push_back ( LegOccurrence ( legocc.tid, l, legoccs[m].tonodeid, legoccs[m].fromnodeid ) );
          setmax ( legoccurrences.maxdegree, database.trees[legocc.tid]->nodes[legoccs[m].tonodeid].edges.size () );
        }
    if ( ( j - k > 2 ) && legocc.tid != lastself ) {
      lastself = legocc.tid;
      legoccurrences.selfjoin++;
    }
  }
  while ( j < legoccs.size () );

    // no need to check that we are frequent, we must be frequent
  legoccurrences.parent = &legoccsdata;
  legoccurrences.number = legoccsdata.number + 1;
  legoccurrences.frequency = legoccsdata.selfjoin; 
    // we compute the self-join frequency exactly while building the
    // previous list. It is therefore not necessary to recompute it.
  return &legoccurrences;
}
// This function is on the critical path. Its efficiency is MOST important.
LegOccurrencesPtr join ( LegOccurrences &legoccsdata1, NodeId connectingnode, LegOccurrences &legoccsdata2 ) {
  if ( graphstate.getNodeDegree ( connectingnode ) == graphstate.getNodeMaxDegree ( connectingnode ) ) 
    return NULL;

  Frequency frequency = 0;
  Tid lasttid = NOTID;
  vector<LegOccurrence> &legoccs1 = legoccsdata1.elements, &legoccs2 = legoccsdata2.elements;
  legoccurrences.elements.resize ( 0 );
  legoccurrences.maxdegree = 0;
  legoccurrences.selfjoin = 0;
  //legoccurrences.elements.reserve ( legoccs1.size () * 2 ); // increased memory usage, and speed!
  OccurrenceId j = 0, k = 0, l, m;
  unsigned int legoccs1size = legoccs1.size (), legoccs2size = legoccs2.size (); // this increases speed CONSIDERABLY!
  Tid lastself = NOTID;

  do {
    while ( j < legoccs1size && legoccs1[j].occurrenceid < legoccs2[k].occurrenceid ) {
      j++;
    }
    if ( j < legoccs1size ) {
      LegOccurrence &jlegocc = legoccs1[j];
      while ( k < legoccs2size && legoccs2[k].occurrenceid < jlegocc.occurrenceid ) {
        k++;
      }
      if ( k < legoccs2size ) {
        if ( legoccs2[k].occurrenceid == jlegocc.occurrenceid ) {
          m = j;
          do {
            j++;
          }
          while ( j < legoccs1size && legoccs1[j].occurrenceid == jlegocc.occurrenceid );
          l = k;
          do {
            k++;
          }
          while ( k < legoccs2size && legoccs2[k].occurrenceid == jlegocc.occurrenceid );
	  bool add = false;
          for ( OccurrenceId m2 = m; m2 < j; m2++ ) {
            int d = 0;
            for ( OccurrenceId l2 = l; l2 < k; l2++ ) {
	      NodeId tonodeid = legoccs2[l2].tonodeid;
              if ( legoccs1[m2].tonodeid !=  tonodeid ) {
                legoccurrences.elements.push_back ( LegOccurrence ( jlegocc.tid, m2, tonodeid, legoccs2[l2].fromnodeid ) );
                setmax ( legoccurrences.maxdegree, database.trees[jlegocc.tid]->nodes[tonodeid].edges.size () );
		add = true;
		d++;
              }
            }
	    if ( d > 1 && jlegocc.tid != lastself ) {
	      lastself = jlegocc.tid;
	      legoccurrences.selfjoin++;
	    }
	  }
	  	  
	  if ( jlegocc.tid != lasttid && add ) {
            lasttid = jlegocc.tid;
	    frequency++;
	  }

          if ( k == legoccs2size )
            break;
        }
      }
      else
        break;
    }
    else
      break;
  }
  while ( true );

  if ( frequency >= minfreq ) {
    legoccurrences.parent = &legoccsdata1;
    legoccurrences.number = legoccsdata1.number + 1;
    legoccurrences.frequency = frequency;
    return &legoccurrences;
  }
  else
    return NULL;
}
Ejemplo n.º 4
0
void Database::reorder () {
    

    // PHASE I: LABEL EDGES ACCORDING TO FREQUENCY
  //  cerr << "REORDER" << endl;


    // gather frequent edgelabels and sort according to frequency
    edgelabelsindexes.reserve ( edgelabels.size () );
    for (unsigned int i = 0; i < edgelabels.size (); i++ ) {
        if ( edgelabels[i].frequency >= fm::minfreq )
            edgelabelsindexes.push_back ( i );                                                              
    }

  //each(edgelabelsindexes) {
  //      cerr << (int) edgelabelsindexes[i] << " (" 
  //         << nodelabels[edgelabels[edgelabelsindexes[i]].fromnodelabel].inputlabel
  //         << " => "
  //         << nodelabels[edgelabels[edgelabelsindexes[i]].tonodelabel].inputlabel
  //         << ")" << endl;
  //}
  //cerr << endl;

    sort ( edgelabelsindexes.begin (), edgelabelsindexes.end (), EdgeLabelsIndexesSort ( edgelabels ) );

  //each(edgelabelsindexes) {
  //    cerr << (int) edgelabelsindexes[i] << " (" 
  //         << nodelabels[edgelabels[edgelabelsindexes[i]].fromnodelabel].inputlabel
  //         << " => "
  //         << nodelabels[edgelabels[edgelabelsindexes[i]].tonodelabel].inputlabel
  //         << ")" << endl;
  //}
  //cerr << endl;


    // Now, use the ranked indices to re-number frequent edge labels with their rank
    for (unsigned int i = 0; i < edgelabelsindexes.size (); i++ ) {
        edgelabels[edgelabelsindexes[i]].edgelabel = i;                 // fill in the edge labels for the first time
//     #define DEBUG
     #ifdef DEBUG
    DatabaseEdgeLabel &label = edgelabels[edgelabelsindexes[i]];
    cout << (int) nodelabels[label.tonodelabel].inputlabel 
         << "[" << (int) label.inputedgelabel << "]" 
	 << (int) nodelabels[label.fromnodelabel].inputlabel <<"-->" << i <<endl;
     #endif                                                                   // the edgelabel is the rank found by REORDER
    }


    // PHASE II: REMOVE INFREQUENT EDGES FROM NODES
//    cerr << "REMOVE" << endl;

    for ( Tid i = 0; i < trees.size (); i++ ) {
        DatabaseTree &tree = * (trees[i]);                                          // for every tree i...
        for ( NodeId j = 0; j < tree.nodes.size (); j++ ) {                         // for every node j...
  //        cerr << endl;
            DatabaseTreeNode &node = tree.nodes[j];
            if ( nodelabels[node.nodelabel].frequency >= fm::minfreq ) {                  // ...check its frequency...
                DatabaseNodeLabel &nodelabel = nodelabels[node.nodelabel];

  //            cerr << "Leg Occurence for node " << nodelabel.inputlabel
  //                 << ": " << tree.tid << " " << nodelabel.occurrences.elements.size() << " " << j << " " << NONODE << endl;
                nodelabel.occurrences.elements.push_back ( LegOccurrence ( tree.tid, (OccurrenceId) nodelabel.occurrences.elements.size (), j, NONODE )  );
                                                                                        // ...and push occurence in database
                int k = 0;
  //            cerr << "node " << (int) node.nodelabel  << " (" << nodelabels[node.nodelabel].inputlabel << ")"  << endl;
                for ( int l = 0; l < node.edges.size (); l++ ) {                        // For each edge l going out of j...
                    
                                        
                    EdgeLabel lab = node.edges[l].edgelabel;                            // ... (with label lab)...
                    if ( edgelabels[lab].frequency >= fm::minfreq ) {                       // ... check its frequency...

  //                    DatabaseTreeEdge& edge = node.edges[l];
  //                    cerr << "  edge " << (int) edge.edgelabel << " moved from " << l << " to " << k << endl;

                        node.edges[k].edgelabel = edgelabels[lab].edgelabel;            // ... and overwrite old edges
                        node.edges[k].tonode = node.edges[l].tonode;
                        //node.edges[k].bond = node.edges[l].bond;
                        k++;
                    }
                }
  //            cerr << "Truncating " << node.edges.size() - k << " edges" << endl;
                node.edges.resize ( k );                                                // truncate the rest
            }
            else node.edges.clear ();                                                   // truncate all edges
        }
    }


}