void matcher::gen_ans_pairs_oldway() { fprintf(stderr, "gen ans pairs old way\n"); ans_pairs.clear(); vector<match_edge> match_edges; char * flag_a = new char[MAX_NODES], * flag = new char[MAX_NODES]; memset(flag_a, 0, MAX_NODES); memset(flag, 0, MAX_NODES); for (int i=1; i <= G_a->num_nodes; i++) for (int j=1; j <= G->num_nodes; j++) match_edges.push_back(match_edge(i, j, sim_nodes[i][j])); sort(match_edges.begin(), match_edges.end()); for (vector <match_edge> :: iterator it=match_edges.begin(); it!=match_edges.end(); it++) { if (!flag_a[it->u] && !flag[it->v]) { flag_a[it->u] = 1; flag[it->v] = 1; ans_pairs.push_back(*it); } } delete []flag_a; delete []flag; }
void analyst::analysis(class matcher &M) { map <int, int> correct_match; int * deg_a = new int[M.G_a->num_nodes+1]; int * deg = new int[M.G->num_nodes+1]; FILE * in = fopen("./data/100\%/pair_a_c.txt", "r"); for (int i, j; fscanf(in, "%d%d", &i, &j) == 2; correct_match[j]=i); fclose(in); calc_degree(*(M.G_a), deg_a); calc_degree(*(M.G), deg); vector<match_edge> match_edges; char * flag_a = new char[MAX_NODES], * flag = new char[MAX_NODES]; memset(flag_a, 0, MAX_NODES); memset(flag, 0, MAX_NODES); for (int i=1; i <= M.G_a->num_nodes; i++) for (int j=1; j <= M.G->num_nodes; j++) match_edges.push_back(match_edge(i, j, M.sim_nodes[i][j])); sort(match_edges.begin(), match_edges.end()); vector <double> seq; FILE * ana = fopen("analysis.txt", "w"); for (int i=0; i < M.ans_pairs.size(); i++) { if (M.ans_pairs[i].u != correct_match[M.ans_pairs[i].v]) fprintf(ana, "@@@\t"); fprintf(ana, "Node \t%d (a: \t%d) \tDegree \t%d (\t%d) match with a_node \t%d (deg \t%d): \t%g", M.ans_pairs[i].v, correct_match[M.ans_pairs[i].v], deg[M.ans_pairs[i].v], deg_a[correct_match[M.ans_pairs[i].v]], M.ans_pairs[i].u, deg_a[M.ans_pairs[i].u], M.ans_pairs[i].w ); seq.clear(); for (int j=1; j<=M.G_a->num_nodes; j++) if (!flag_a[j]) seq.push_back(M.sim_nodes[j][M.ans_pairs[i].v]); flag_a[M.ans_pairs[i].u] = 1; flag[M.ans_pairs[i].v] = 1; sort(seq.begin(), seq.end()); int k = 0; for (; k<seq.size() && seq[k] <= M.ans_pairs[i].w + 1e-6; k++); fprintf(ana, " (No. \t%d, score \t%g/%g)", (int)seq.size()-k+1, seq[k-1], seq[seq.size()-1]); fprintf(ana, "\n"); } fclose(ana); delete []deg_a; delete []deg; delete []flag_a; delete []flag; }
double matcher::calc_sim_nodes(int u, int v, int level) { #ifdef USE_ONLY_NEIGHBORS level = 1; #endif graph::subgraph * subg_a = G_a->extract_subgraph(u); graph::subgraph * subg = G->extract_subgraph(v); double w = 0; #if AVERAGE_EACH_CALC int sum = 0; #endif vector <match_edge> match_edges; set <int> flag_a; set <int> flag; for (int t=0; t<2; t++) { flag_a.clear(); flag.clear(); match_edges.clear(); for (int i=0; i < level; i++) { #if AVERAGE_EACH_CALC sum += min(subg_a[t].nodes_per_level[i].size(), subg[t].nodes_per_level[i].size()); #endif for (vector<int> :: iterator j = subg_a[t].nodes_per_level[i].begin(); j!=subg_a[t].nodes_per_level[i].end(); j++) for (vector<int> :: iterator k = subg[t].nodes_per_level[i].begin(); k!=subg[t].nodes_per_level[i].end(); k++) match_edges.push_back(match_edge(*j, *k, last_round[*j][*k])); } sort(match_edges.begin(), match_edges.end()); for (vector<match_edge> :: iterator it=match_edges.begin(); it!=match_edges.end(); it++) { if (flag_a.find(it->u) == flag_a.end() && flag.find(it->v) == flag.end()) { flag_a.insert(it->u); flag.insert(it->v); w += it->w; } } #if AVERAGE_EACH_CALC w /= sum; #endif } return sim_nodes[u][v] = w ; }
double matcher::calc_sim_nodes(int u, int v, int level) { #ifdef USE_ONLY_NEIGHBORS level = 1; #endif graph::subgraph * subg_a = G_a->extract_subgraph(u); graph::subgraph * subg = G->extract_subgraph(v); double w = 0; vector <match_edge> match_edges; set <int> flag_a; set <int> flag; for (int t=0; t<2; t++) { flag_a.clear(); flag.clear(); match_edges.clear(); for (int i=0; i < level; i++) { for (vector<int> :: iterator j = subg_a[t].nodes_per_level[i].begin(); j!=subg_a[t].nodes_per_level[i].end(); j++) for (vector<int> :: iterator k = subg[t].nodes_per_level[i].begin(); k!=subg[t].nodes_per_level[i].end(); k++) match_edges.push_back(match_edge(*j, *k, last_round[*j][*k])); } sort(match_edges.begin(), match_edges.end()); for (vector<match_edge> :: iterator it=match_edges.begin(); it!=match_edges.end(); it++) { if (flag_a.find(it->u) == flag_a.end() && flag.find(it->v) == flag.end()) { flag_a.insert(it->u); flag.insert(it->v); w += it->w; } } } #ifndef BASELINE // RoleSim int weight = (int) max(G_a->edges[u]->size(), G->edges[v]->size()) + (int) max(G_a->rev_edges[u]->size(), G->rev_edges[v]->size()); if (weight > 0) w /= weight; return sim_nodes[u][v] = w * (1 - BETA) + BETA; #else return sim_nodes[u][v] = w; #endif }
void matcher::gen_ans_pairs() { clock_t time_start = clock(); ans_pairs.clear(); vector<match_edge> match_edges; char * flag_a = new char[MAX_NODES], * flag = new char[MAX_NODES]; char * fake_flag_a = new char[MAX_NODES], * fake_flag = new char[MAX_NODES]; memset(flag_a, 0, MAX_NODES); memset(flag, 0, MAX_NODES); memset(fake_flag_a, 0, MAX_NODES); memset(fake_flag, 0, MAX_NODES); int * match = new int[MAX_NODES]; for (int i=1; i <= G_a->num_nodes; i++) for (int j=1; j <= G->num_nodes; j++) match_edges.push_back(match_edge(i, j, sim_nodes[i][j])); sort(match_edges.begin(), match_edges.end()); for (vector <match_edge> :: iterator it=match_edges.begin(); it!=match_edges.end(); it++) if (!fake_flag_a[it->u] && !fake_flag[it->v]) { fake_flag_a[it->u] = 1; fake_flag[it->v] = 1; if (ans_pairs.size() < G_a->num_nodes * PERC_THRSD && !flag_a[it->u] && !flag[it->v]) { flag_a[it->u] = 1; flag[it->v] = 1; match[it->u] = it->v; ans_pairs.push_back(*it); } if (ans_pairs.size() > G_a->num_nodes * PERC_THRSD) break; } fprintf(stderr, "First part: %lu pairs.\n", ans_pairs.size()); double TINY = 1e20; for (size_t i = 1; i <= G_a->num_nodes; i++) { for (int j = 1; j <= G->num_nodes; j++) if (TINY > sim_nodes[i][j]) TINY = sim_nodes[i][j]; } for (int i=1; i<=G_a->num_nodes; i++) { if (!flag_a[i]) { map <int, double> weight; for (vector <int> :: iterator j = G_a->edges[i]->begin(); j != G_a->edges[i]->end(); j++) if (flag_a[*j]) { for (vector <int> :: iterator k = G->rev_edges[match[*j]]->begin(); k != G->rev_edges[match[*j]]->end(); k++) if (!flag[*k]) weight[*k] += max(sim_nodes[i][*k], TINY); } for (vector <int> :: iterator j = G_a->rev_edges[i]->begin(); j != G_a->rev_edges[i]->end(); j++) if (flag_a[*j]) { for (vector <int> :: iterator k = G->edges[match[*j]]->begin(); k != G->edges[match[*j]]->end(); k++) if (!flag[*k]) weight[*k] += max(sim_nodes[i][*k], TINY); } for (map <int, double> :: iterator k = weight.begin(); k!=weight.end(); k++) weights[i][k->first] = k->second; } } fprintf(stderr, "init weights.\n"); int iterno = 0; H = new matcher::heap(weights, G_a->num_nodes, this); for (int last_s = 0, u, v; H->len && ans_pairs.size() != last_s; iterno++) { last_s = ans_pairs.size(); while (H->len){ u = H->nodes[1].u, v = H->nodes[1].v; H->pop(); if (!flag_a[u] && !flag[v]) break; } if (flag_a[u] || flag[v]) break; flag_a[u] = flag[v] = 1; match[u] = v; ans_pairs.push_back(match_edge(u, v, weights[u][v])); for (vector <int> :: iterator j = G_a->edges[u]->begin(); j != G_a->edges[u]->end(); j++) if (!flag_a[*j]) { for (vector <int> :: iterator k = G->edges[v]->begin(); k != G->edges[v]->end(); k++) if (!flag[*k]){ weights[*j][*k] += max(sim_nodes[*j][*k], TINY) * DECAY(iterno); if (H->heap_pos[*j][*k]) H->heap_up(H->heap_pos[*j][*k]); else H->push(*j, *k); } } for (vector <int> :: iterator j = G_a->rev_edges[u]->begin(); j != G_a->rev_edges[u]->end(); j++) if (!flag_a[*j]) { for (vector <int> :: iterator k = G->rev_edges[v]->begin(); k != G->rev_edges[v]->end(); k++) if (!flag[*k]){ weights[*j][*k] += max(sim_nodes[*j][*k], TINY) * DECAY(iterno); if (H->heap_pos[*j][*k]) H->heap_up(H->heap_pos[*j][*k]); else H->push(*j, *k); } } } fprintf(stderr, "%lu pairs matched.\n", ans_pairs.size()); for (std::vector<match_edge> :: iterator it=match_edges.begin(); it!=match_edges.end(); it++) if (!flag_a[it->u] && !flag[it->v]){ flag_a[it->u] = flag[it->v] = 1; match[it->u] = it->v; ans_pairs.push_back(*it); } fprintf(stderr, "%lu pairs processed.\n", ans_pairs.size()); for (int i=1; i<=G_a->num_nodes; i++) if (!flag_a[i]) { for (int j=1; j<=G->num_nodes; j++) if (!flag[j]) { flag_a[i] = flag[j] = 1; match[i] = j; ans_pairs.push_back(match_edge(i, j, 0)); break; } } delete H; delete []match; delete []flag_a; delete []flag; delete []fake_flag_a; delete []fake_flag; fprintf(stderr, "answer pairs generated.\n\t%.2lf seconds.\n", (clock()-time_start)*1.0/CLOCKS_PER_SEC); }
void matcher::gen_ans_pairs() { clock_t time_start = clock(); ans_pairs.clear(); vector<match_edge> match_edges; char * flag_a = new char[MAX_NODES], * flag = new char[MAX_NODES]; char * fake_flag_a = new char[MAX_NODES], * fake_flag = new char[MAX_NODES]; memset(flag_a, 0, MAX_NODES); memset(flag, 0, MAX_NODES); memset(fake_flag_a, 0, MAX_NODES); memset(fake_flag, 0, MAX_NODES); int * match = new int[MAX_NODES]; for (int i=1; i <= G_a->num_nodes; i++) for (int j=1; j <= G->num_nodes; j++) match_edges.push_back(match_edge(i, j, sim_nodes[i][j])); sort(match_edges.begin(), match_edges.end()); for (vector <match_edge> :: iterator it=match_edges.begin(); it!=match_edges.end(); it++) if (!fake_flag_a[it->u] && !fake_flag[it->v]) { fake_flag_a[it->u] = 1; fake_flag[it->v] = 1; if (ans_pairs.size() < G_a->num_nodes * PERC_THRSD && !flag_a[it->u] && !flag[it->v]) { flag_a[it->u] = 1; flag[it->v] = 1; match[it->u] = it->v; ans_pairs.push_back(*it); } else if (ans_pairs.size() > G_a->num_nodes * PERC_THRSD) break; } fprintf(stderr, "First part: %lu pairs.\n", ans_pairs.size()); int iterno = 0; double TINY = 1e20; for (int i=1; i<=G_a->num_nodes; i++) for (int j=1; j<=G->num_nodes; j++) if (sim_nodes[i][j] > 0 && TINY > sim_nodes[i][j]) TINY = sim_nodes[i][j]; fprintf(stderr, "TINY = %g\n", TINY); iter: // fprintf(stderr, "iter %d @ %lu\n", iterno, ans_pairs.size()); match_edges.clear(); for (int i=1; i<=G_a->num_nodes; i++) { if (!flag_a[i]) { map <int, double> weight; for (vector <int> :: iterator j = G_a->edges[i]->begin(); j != G_a->edges[i]->end(); j++) if (flag_a[*j]) { for (vector <int> :: iterator k = G->rev_edges[match[*j]]->begin(); k != G->rev_edges[match[*j]]->end(); k++) if (!flag[*k]) weight[*k] += max(sim_nodes[i][*k], TINY); } for (vector <int> :: iterator j = G_a->rev_edges[i]->begin(); j != G_a->rev_edges[i]->end(); j++) if (flag_a[*j]) { for (vector <int> :: iterator k = G->edges[match[*j]]->begin(); k != G->edges[match[*j]]->end(); k++) if (!flag[*k]) weight[*k] += max(sim_nodes[i][*k], TINY); } for (map <int, double> :: iterator k = weight.begin(); k!=weight.end(); k++) match_edges.push_back(match_edge(i, k->first, k->second)); } } sort(match_edges.begin(), match_edges.end()); for (vector <match_edge> :: iterator it=match_edges.begin(); it!=match_edges.end(); it++) { if (it - match_edges.begin() >= NUM_PER_ITER){ iterno++; goto iter; } if ( !flag_a[it->u] && !flag[it->v]) { flag_a[it->u] = 1; flag[it->v] = 1; match[it->u] = it->v; ans_pairs.push_back(*it); } } fprintf(stderr, "%lu pairs matched.\n", ans_pairs.size()); for (vector<match_edge> :: iterator it=match_edges.begin(); it!=match_edges.end(); it++) if (!flag_a[it->u] && !flag[it->v]){ flag_a[it->u] = flag[it->v] = 1; match[it->u] = it->v; ans_pairs.push_back(*it); } fprintf(stderr, "%lu pairs processed.\n", ans_pairs.size()); for (int i=1; i<=G_a->num_nodes; i++) if (!flag_a[i]) { for (int j=1; j<=G->num_nodes; j++) if (!flag[j]) { flag_a[i] = flag[j] = 1; match[i] = j; ans_pairs.push_back(match_edge(i, j, 0)); break; } } delete []match; delete []flag_a; delete []flag; delete []fake_flag_a; delete []fake_flag; fprintf(stderr, "answer pairs generated.\n\t%.2lf seconds.\n", (clock()-time_start)*1.0/CLOCKS_PER_SEC); }
void matcher::gen_ans_pairs() { clock_t time_start = clock(); ans_pairs.clear(); char * flag_a = new char[MAX_NODES], * flag = new char[MAX_NODES]; char * fake_flag_a = new char[MAX_NODES], * fake_flag = new char[MAX_NODES]; memset(fake_flag_a, 0, MAX_NODES); memset(fake_flag, 0, MAX_NODES); memset(flag_a, 0, MAX_NODES); memset(flag, 0, MAX_NODES); int * match = new int[MAX_NODES]; double TINY = 1e20; // find TINY for (int i=1; i<=G_a->num_nodes; i++) for (int j=1; j<=G->num_nodes; j++) if (sim_nodes[i][j] > 0 && TINY > sim_nodes[i][j]) TINY = sim_nodes[i][j]; fprintf(stderr, "TINY = %g\n", TINY); // initialization for (int i=1; i<=G_a->num_nodes; i++) for (int j=1; j<=G->num_nodes; j++) { weights[i][j] = sim_nodes[i][j]; tops[i].push(make_pair(sim_nodes[i][j], j)); } // matching process for (int idx, matched=0; matched < G_a->num_nodes; ) { idx=-1; for (int i=1; i<=G_a->num_nodes; i++) { while (!tops[i].empty() && flag[tops[i].top().second]) tops[i].pop(); if (!flag_a[i] && !tops[i].empty()) { if (idx == -1) idx = i; else if (!tops[i].empty() && tops[i].top().first > tops[idx].top().first) idx = i; } } if (idx < 0) break; int u = idx, v = tops[idx].top().second; tops[idx].pop(); if (flag[v] || flag_a[u]) continue; matched++; flag_a[u] = 1; flag[v] = 1; match[u] = v; ans_pairs.push_back(match_edge(u, v, weights[u][v])); for (vector <int> ::iterator i = G_a->edges[u]->begin(); i != G_a->edges[u]->end(); i++) if (!flag_a[*i]) for (vector <int> ::iterator j = G->edges[v]->begin(); j != G->edges[v]->end(); j++) if (!flag[*j]) { weights[*i][*j] += sim_nodes[*i][*j];//max(sim_nodes[*i][*j], TINY); tops[*i].push(make_pair(weights[*i][*j], *j)); } for (vector <int> ::iterator i = G_a->rev_edges[u]->begin(); i != G_a->rev_edges[u]->end(); i++) if (!flag_a[*i]) for (vector <int> ::iterator j = G->rev_edges[v]->begin(); j != G->rev_edges[v]->end(); j++) if (!flag[*j]) { weights[*i][*j] += sim_nodes[*i][*j];//max(sim_nodes[*i][*j], TINY); tops[*i].push(make_pair(weights[*i][*j], *j)); } } fprintf(stderr, "%lu pairs matched.\n", ans_pairs.size()); delete []match; delete []flag_a; delete []flag; delete []fake_flag_a; delete []fake_flag; fprintf(stderr, "answer pairs generated.\n\t%.2lf seconds.\n", (clock()-time_start)*1.0/CLOCKS_PER_SEC); }