void extend ( LegOccurrences &legoccurrencesdata, EdgeLabel minlabel, EdgeLabel neglect ) { // we're trying hard to avoid repeated destructor/constructor calls for complex types like vectors. // better reuse previously allocated memory, if possible! vector<LegOccurrence> &legoccurrences = legoccurrencesdata.elements; int lastself[candidatelegsoccurrences.size ()]; for ( int i = 0; i < candidatelegsoccurrences.size (); i++ ) { candidatelegsoccurrences[i].elements.resize ( 0 ); candidatelegsoccurrences[i].parent = &legoccurrencesdata; candidatelegsoccurrences[i].number = legoccurrencesdata.number + 1; candidatelegsoccurrences[i].maxdegree = 0; candidatelegsoccurrences[i].selfjoin = 0; lastself[i] = NOTID; candidatelegsoccurrences[i].frequency = 0; } closelegsoccsused = false; // we are lazy with the initialization of close leg arrays, as we may not need them at all in // many cases for ( OccurrenceId i = 0; i < legoccurrences.size (); i++ ) { LegOccurrence &legocc = legoccurrences[i]; DatabaseTreePtr tree = database.trees[legocc.tid]; DatabaseTreeNode &node = tree->nodes[legocc.tonodeid]; for ( int j = 0; j < node.edges.size (); j++ ) { if ( node.edges[j].tonode != legocc.fromnodeid ) { EdgeLabel edgelabel = node.edges[j].edgelabel; int number = nocycle ( tree, node, node.edges[j].tonode, i, &legoccurrencesdata ); if ( number == 0 ) { if ( edgelabel >= minlabel && edgelabel != neglect ) { vector<LegOccurrence> &candidatelegsoccs = candidatelegsoccurrences[edgelabel].elements; if ( candidatelegsoccs.empty () ) candidatelegsoccurrences[edgelabel].frequency++; else { if ( candidatelegsoccs.back ().tid != legocc.tid ) candidatelegsoccurrences[edgelabel].frequency++; if ( candidatelegsoccs.back ().occurrenceid == i && lastself[edgelabel] != legocc.tid ) { lastself[edgelabel] = legocc.tid; candidatelegsoccurrences[edgelabel].selfjoin++; } } candidatelegsoccs.push_back ( LegOccurrence ( legocc.tid, i, node.edges[j].tonode, legocc.tonodeid ) ); setmax ( candidatelegsoccurrences[edgelabel].maxdegree, database.trees[legocc.tid]->nodes[node.edges[j].tonode].edges.size () ); } } else if ( number - 1 != graphstate.nodes.back().edges[0].tonode ) { candidateCloseLegsAllocate ( number, legoccurrencesdata.number + 1 ); vector<CloseLegOccurrence> &candidatelegsoccs = candidatecloselegsoccs[number][edgelabel].elements; if ( !candidatelegsoccs.size () || candidatelegsoccs.back ().tid != legocc.tid ) candidatecloselegsoccs[number][edgelabel].frequency++; candidatelegsoccs.push_back ( CloseLegOccurrence ( legocc.tid, i ) ); setmax ( candidatelegsoccurrences[edgelabel].maxdegree, database.trees[legocc.tid]->nodes[node.edges[j].tonode].edges.size () ); } } } } }
LegOccurrencesPtr join ( LegOccurrences &legoccsdata ) { if ( legoccsdata.selfjoin < minfreq ) return NULL; legoccurrences.elements.resize ( 0 ); vector<LegOccurrence> &legoccs = legoccsdata.elements; legoccurrences.maxdegree = 0; legoccurrences.selfjoin = 0; Tid lastself = NOTID; OccurrenceId j = 0, k, l, m; do { k = j; LegOccurrence &legocc = legoccs[k]; do { j++; } while ( j < legoccs.size () && legoccs[j].occurrenceid == legocc.occurrenceid ); for ( l = k; l < j; l++ ) for ( m = k; m < j; m++ ) if ( l != m ) { legoccurrences.elements.push_back ( LegOccurrence ( legocc.tid, l, legoccs[m].tonodeid, legoccs[m].fromnodeid ) ); setmax ( legoccurrences.maxdegree, database.trees[legocc.tid]->nodes[legoccs[m].tonodeid].edges.size () ); } if ( ( j - k > 2 ) && legocc.tid != lastself ) { lastself = legocc.tid; legoccurrences.selfjoin++; } } while ( j < legoccs.size () ); // no need to check that we are frequent, we must be frequent legoccurrences.parent = &legoccsdata; legoccurrences.number = legoccsdata.number + 1; legoccurrences.frequency = legoccsdata.selfjoin; // we compute the self-join frequency exactly while building the // previous list. It is therefore not necessary to recompute it. return &legoccurrences; }
// This function is on the critical path. Its efficiency is MOST important. LegOccurrencesPtr join ( LegOccurrences &legoccsdata1, NodeId connectingnode, LegOccurrences &legoccsdata2 ) { if ( graphstate.getNodeDegree ( connectingnode ) == graphstate.getNodeMaxDegree ( connectingnode ) ) return NULL; Frequency frequency = 0; Tid lasttid = NOTID; vector<LegOccurrence> &legoccs1 = legoccsdata1.elements, &legoccs2 = legoccsdata2.elements; legoccurrences.elements.resize ( 0 ); legoccurrences.maxdegree = 0; legoccurrences.selfjoin = 0; //legoccurrences.elements.reserve ( legoccs1.size () * 2 ); // increased memory usage, and speed! OccurrenceId j = 0, k = 0, l, m; unsigned int legoccs1size = legoccs1.size (), legoccs2size = legoccs2.size (); // this increases speed CONSIDERABLY! Tid lastself = NOTID; do { while ( j < legoccs1size && legoccs1[j].occurrenceid < legoccs2[k].occurrenceid ) { j++; } if ( j < legoccs1size ) { LegOccurrence &jlegocc = legoccs1[j]; while ( k < legoccs2size && legoccs2[k].occurrenceid < jlegocc.occurrenceid ) { k++; } if ( k < legoccs2size ) { if ( legoccs2[k].occurrenceid == jlegocc.occurrenceid ) { m = j; do { j++; } while ( j < legoccs1size && legoccs1[j].occurrenceid == jlegocc.occurrenceid ); l = k; do { k++; } while ( k < legoccs2size && legoccs2[k].occurrenceid == jlegocc.occurrenceid ); bool add = false; for ( OccurrenceId m2 = m; m2 < j; m2++ ) { int d = 0; for ( OccurrenceId l2 = l; l2 < k; l2++ ) { NodeId tonodeid = legoccs2[l2].tonodeid; if ( legoccs1[m2].tonodeid != tonodeid ) { legoccurrences.elements.push_back ( LegOccurrence ( jlegocc.tid, m2, tonodeid, legoccs2[l2].fromnodeid ) ); setmax ( legoccurrences.maxdegree, database.trees[jlegocc.tid]->nodes[tonodeid].edges.size () ); add = true; d++; } } if ( d > 1 && jlegocc.tid != lastself ) { lastself = jlegocc.tid; legoccurrences.selfjoin++; } } if ( jlegocc.tid != lasttid && add ) { lasttid = jlegocc.tid; frequency++; } if ( k == legoccs2size ) break; } } else break; } else break; } while ( true ); if ( frequency >= minfreq ) { legoccurrences.parent = &legoccsdata1; legoccurrences.number = legoccsdata1.number + 1; legoccurrences.frequency = frequency; return &legoccurrences; } else return NULL; }
void Database::reorder () { // PHASE I: LABEL EDGES ACCORDING TO FREQUENCY // cerr << "REORDER" << endl; // gather frequent edgelabels and sort according to frequency edgelabelsindexes.reserve ( edgelabels.size () ); for (unsigned int i = 0; i < edgelabels.size (); i++ ) { if ( edgelabels[i].frequency >= fm::minfreq ) edgelabelsindexes.push_back ( i ); } //each(edgelabelsindexes) { // cerr << (int) edgelabelsindexes[i] << " (" // << nodelabels[edgelabels[edgelabelsindexes[i]].fromnodelabel].inputlabel // << " => " // << nodelabels[edgelabels[edgelabelsindexes[i]].tonodelabel].inputlabel // << ")" << endl; //} //cerr << endl; sort ( edgelabelsindexes.begin (), edgelabelsindexes.end (), EdgeLabelsIndexesSort ( edgelabels ) ); //each(edgelabelsindexes) { // cerr << (int) edgelabelsindexes[i] << " (" // << nodelabels[edgelabels[edgelabelsindexes[i]].fromnodelabel].inputlabel // << " => " // << nodelabels[edgelabels[edgelabelsindexes[i]].tonodelabel].inputlabel // << ")" << endl; //} //cerr << endl; // Now, use the ranked indices to re-number frequent edge labels with their rank for (unsigned int i = 0; i < edgelabelsindexes.size (); i++ ) { edgelabels[edgelabelsindexes[i]].edgelabel = i; // fill in the edge labels for the first time // #define DEBUG #ifdef DEBUG DatabaseEdgeLabel &label = edgelabels[edgelabelsindexes[i]]; cout << (int) nodelabels[label.tonodelabel].inputlabel << "[" << (int) label.inputedgelabel << "]" << (int) nodelabels[label.fromnodelabel].inputlabel <<"-->" << i <<endl; #endif // the edgelabel is the rank found by REORDER } // PHASE II: REMOVE INFREQUENT EDGES FROM NODES // cerr << "REMOVE" << endl; for ( Tid i = 0; i < trees.size (); i++ ) { DatabaseTree &tree = * (trees[i]); // for every tree i... for ( NodeId j = 0; j < tree.nodes.size (); j++ ) { // for every node j... // cerr << endl; DatabaseTreeNode &node = tree.nodes[j]; if ( nodelabels[node.nodelabel].frequency >= fm::minfreq ) { // ...check its frequency... DatabaseNodeLabel &nodelabel = nodelabels[node.nodelabel]; // cerr << "Leg Occurence for node " << nodelabel.inputlabel // << ": " << tree.tid << " " << nodelabel.occurrences.elements.size() << " " << j << " " << NONODE << endl; nodelabel.occurrences.elements.push_back ( LegOccurrence ( tree.tid, (OccurrenceId) nodelabel.occurrences.elements.size (), j, NONODE ) ); // ...and push occurence in database int k = 0; // cerr << "node " << (int) node.nodelabel << " (" << nodelabels[node.nodelabel].inputlabel << ")" << endl; for ( int l = 0; l < node.edges.size (); l++ ) { // For each edge l going out of j... EdgeLabel lab = node.edges[l].edgelabel; // ... (with label lab)... if ( edgelabels[lab].frequency >= fm::minfreq ) { // ... check its frequency... // DatabaseTreeEdge& edge = node.edges[l]; // cerr << " edge " << (int) edge.edgelabel << " moved from " << l << " to " << k << endl; node.edges[k].edgelabel = edgelabels[lab].edgelabel; // ... and overwrite old edges node.edges[k].tonode = node.edges[l].tonode; //node.edges[k].bond = node.edges[l].bond; k++; } } // cerr << "Truncating " << node.edges.size() - k << " edges" << endl; node.edges.resize ( k ); // truncate the rest } else node.edges.clear (); // truncate all edges } } }