double c_nodes_jacc(int_matrix & ten, int_matrix & en, int dim) { // this function does a best match based on the jaccard index. // note that it should be weighted on the cluster size (I believe) deque<deque<int> > mems; deque<int> first; for(int i=0; i<dim; i++) mems.push_back(first); for (int ii=0; ii<int(ten.size()); ii++) for(int i=0; i<int(ten[ii].size()); i++) mems[ten[ii][i]].push_back(ii); double global_overlap=0; RANGE_loop(k, en) { deque<int> & c = en[k]; map<int, int> com_ol; // it maps the index of the ten into the overlap with en[k] RANGE_loop(i, c) { for(int j=0; j<int(mems[c[i]].size()); j++) int_histogram(mems[c[i]][j], com_ol); } double max_jac=0; for(map<int, int>::iterator itm=com_ol.begin(); itm!=com_ol.end(); itm++) { set<int> s1; set<int> s2; deque_to_set(c, s1); deque_to_set(ten[itm->first], s2); double jc=jaccard(s1, s2); cout<<"jc: "<<jc<<endl; max_jac=max(max_jac, jc); } global_overlap+=max_jac; cout<<"========== "<<global_overlap<<endl; }
void int_histogram(string infile, string outfile) { // this makes a int_histogram of integers from a file char b[infile.size()+outfile.size()+1]; cast_string_to_char(infile, b); ifstream ing(b); deque<int> H; int h; while(ing>>h) H.push_back(h); cast_string_to_char(outfile, b); ofstream outg(b); int_histogram(H, outg); }
double H_x_given_y3(deque<deque<int> > &en, deque<deque<int> > &ten, int dim) { // you know y and you want to find x according to a certain index labelling. // so, for each x you look for the best y. deque<deque<int> > mems; deque<int> first; for(int i=0; i<dim; i++) mems.push_back(first); for (int ii=0; ii<int(ten.size()); ii++) for(int i=0; i<int(ten[ii].size()); i++) mems[ten[ii][i]].push_back(ii); double H_x_y=0; for (int k=0; k<int(en.size()); k++) { deque<int> & c = en[k]; deque <double> p; double I2=double(c.size()); double O2=(dim-I2); p.push_back(I2/dim); p.push_back(O2/dim); double H2_=H(p); p.clear(); double diff=H2_; // I need to know all the group with share nodes with en[k] map<int, int> com_ol; // it maps the index of the ten into the overlap with en[k] for(int i=0; i<int(c.size()); i++) { for(int j=0; j<int(mems[c[i]].size()); j++) int_histogram(mems[c[i]][j], com_ol); } for(map<int, int>::iterator itm=com_ol.begin(); itm!=com_ol.end(); itm++) { double I1=double(ten[itm->first].size()); double O1=(dim-I1); p.push_back(I1/dim); p.push_back(O1/dim); double H1_=H(p); p.clear(); double I1_I2= itm->second; double I1_02= ten[itm->first].size() - I1_I2; double O1_I2= c.size() - I1_I2; double O1_02= dim - I1_I2 - I1_02 - O1_I2; p.push_back(I1_I2/dim); p.push_back(O1_02/dim); double H12_positive=H(p); p.clear(); p.push_back(I1_02/dim); p.push_back(O1_I2/dim); double H12_negative=H(p); double H12_=H12_negative+H12_positive; p.clear(); if (H12_negative>H12_positive) { H12_=H1_+H2_; } if ((H12_-H1_)<diff) { diff=(H12_-H1_); } } if (H2_==0) H_x_y+=1; else H_x_y+=(diff/H2_); } return (H_x_y/(en.size())); }
int print_network(deque<set<int> > & E, const deque<deque<int> > & member_list, const deque<deque<int> > & member_matrix, deque<int> & num_seq) { int edges=0; int num_nodes=member_list.size(); deque<double> double_mixing; for (int i=0; i<E.size(); i++) { double one_minus_mu = double(internal_kin(E, member_list, i))/E[i].size(); double_mixing.push_back(1.- one_minus_mu); edges+=E[i].size(); } //cout<<"\n----------------------------------------------------------"<<endl; //cout<<endl; double density=0; double sparsity=0; for (int i=0; i<member_matrix.size(); i++) { double media_int=0; double media_est=0; for (int j=0; j<member_matrix[i].size(); j++) { double kinj = double(internal_kin_only_one(E[member_matrix[i][j]], member_matrix[i])); media_int+= kinj; media_est+=E[member_matrix[i][j]].size() - double(internal_kin_only_one(E[member_matrix[i][j]], member_matrix[i])); } double pair_num=(member_matrix[i].size()*(member_matrix[i].size()-1)); double pair_num_e=((num_nodes-member_matrix[i].size())*(member_matrix[i].size())); if(pair_num!=0) density+=media_int/pair_num; if(pair_num_e!=0) sparsity+=media_est/pair_num_e; } density=density/member_matrix.size(); sparsity=sparsity/member_matrix.size(); ofstream out1("network.dat"); for (int u=0; u<E.size(); u++) { set<int>::iterator itb=E[u].begin(); while (itb!=E[u].end()) out1<<u+1<<"\t"<<*(itb++)+1<<endl; } out1.close(); ofstream out2("community.dat"); for (int i=0; i<member_list.size(); i++) { out2<<i+1<<"\t"; for (int j=0; j<member_list[i].size(); j++) out2<<member_list[i][j]+1<<" "; out2<<endl; } out2.close(); cout<<"\n\n---------------------------------------------------------------------------"<<endl; cout<<"network of "<<num_nodes<<" vertices and "<<edges/2<<" edges"<<";\t average degree = "<<double(edges)/num_nodes<<endl; cout<<"\naverage mixing parameter: "<<average_func(double_mixing)<<" +/- "<<sqrt(variance_func(double_mixing))<<endl; cout<<"p_in: "<<density<<"\tp_out: "<<sparsity<<endl; ofstream statout("statistics.dat"); deque<int> degree_seq; for (int i=0; i<E.size(); i++) degree_seq.push_back(E[i].size()); statout<<"degree distribution (probability density function of the degree in logarithmic bins) "<<endl; log_histogram(degree_seq, statout, 10); statout<<"\ndegree distribution (degree-occurrences) "<<endl; int_histogram(degree_seq, statout); statout<<endl<<"--------------------------------------"<<endl; statout<<"community distribution (size-occurrences)"<<endl; int_histogram(num_seq, statout); statout<<endl<<"--------------------------------------"<<endl; statout<<"mixing parameter"<<endl; not_norm_histogram(double_mixing, statout, 20, 0, 0); statout<<endl<<"--------------------------------------"<<endl; statout.close(); cout<<endl<<endl; return 0; }
int print_network(deque<set<int> > & E, const deque<deque<int> > & member_list, const deque<deque<int> > & member_matrix, deque<int> & num_seq, deque<map <int, double > > & neigh_weigh, double beta, double mu, double mu0) { int edges=0; int num_nodes=member_list.size(); deque<double> double_mixing; for (int i=0; i<E.size(); i++) { double one_minus_mu = double(internal_kin(E, member_list, i))/E[i].size(); double_mixing.push_back(1.- one_minus_mu); edges+=E[i].size(); } //cout<<"\n----------------------------------------------------------"<<endl; //cout<<endl; double density=0; double sparsity=0; for (int i=0; i<member_matrix.size(); i++) { double media_int=0; double media_est=0; for (int j=0; j<member_matrix[i].size(); j++) { double kinj = double(internal_kin_only_one(E[member_matrix[i][j]], member_matrix[i])); media_int+= kinj; media_est+=E[member_matrix[i][j]].size() - double(internal_kin_only_one(E[member_matrix[i][j]], member_matrix[i])); } double pair_num=(member_matrix[i].size()*(member_matrix[i].size()-1)); double pair_num_e=((num_nodes-member_matrix[i].size())*(member_matrix[i].size())); if(pair_num!=0) density+=media_int/pair_num; if(pair_num_e!=0) sparsity+=media_est/pair_num_e; } density=density/member_matrix.size(); sparsity=sparsity/member_matrix.size(); ofstream out1("network.dat"); for (int u=0; u<E.size(); u++) { set<int>::iterator itb=E[u].begin(); while (itb!=E[u].end()) out1<<u+1<<"\t"<<*(itb++)+1<<"\t"<<neigh_weigh[u][*(itb)]<<endl; } ofstream out2("community.dat"); for (int i=0; i<member_list.size(); i++) { out2<<i+1<<"\t"; for (int j=0; j<member_list[i].size(); j++) out2<<member_list[i][j]+1<<" "; out2<<endl; } cout<<"\n\n---------------------------------------------------------------------------"<<endl; cout<<"network of "<<num_nodes<<" vertices and "<<edges/2<<" edges"<<";\t average degree = "<<double(edges)/num_nodes<<endl; cout<<"\naverage mixing parameter (topology): "<< average_func(double_mixing)<<" +/- "<<sqrt(variance_func(double_mixing))<<endl; cout<<"p_in: "<<density<<"\tp_out: "<<sparsity<<endl; ofstream statout("statistics.dat"); deque<int> degree_seq; for (int i=0; i<E.size(); i++) degree_seq.push_back(E[i].size()); statout<<"degree distribution (probability density function of the degree in logarithmic bins) "<<endl; log_histogram(degree_seq, statout, 10); statout<<"\ndegree distribution (degree-occurrences) "<<endl; int_histogram(degree_seq, statout); statout<<endl<<"--------------------------------------"<<endl; statout<<"community distribution (size-occurrences)"<<endl; int_histogram(num_seq, statout); statout<<endl<<"--------------------------------------"<<endl; statout<<"mixing parameter (topology)"<<endl; not_norm_histogram(double_mixing, statout, 20, 0, 0); statout<<endl<<"--------------------------------------"<<endl; //* deque<double> inwij; deque<double> outwij; //deque<double> inkij; //deque<double> outkij; double csi=(1. - mu) / (1. - mu0); double csi2=mu /mu0; double tstrength=0; deque<double> one_minus_mu2; for(int i=0; i<neigh_weigh.size(); i++) { double internal_strength_i=0; double strength_i=0; for(map<int, double>::iterator itm = neigh_weigh[i].begin(); itm!=neigh_weigh[i].end(); itm++) { if(they_are_mate(i, itm->first, member_list)) { inwij.push_back(itm->second); //inkij.push_back(csi * pow(E[i].size(), beta-1)); internal_strength_i+=itm->second; } else { outwij.push_back(itm->second); //outkij.push_back(csi2 * pow(E[i].size(), beta-1)); } tstrength+=itm->second; strength_i+=itm->second; } one_minus_mu2.push_back(1 - internal_strength_i/strength_i); } //cout<<"average strength "<<tstrength / E.size()<<"\taverage internal strenght: "<<average_internal_strenght<<endl; cout<<"\naverage mixing parameter (weights): "<<average_func(one_minus_mu2)<<" +/- "<<sqrt(variance_func(one_minus_mu2))<<endl; statout<<"mixing parameter (weights)"<<endl; not_norm_histogram(one_minus_mu2, statout, 20, 0, 0); statout<<endl<<"--------------------------------------"<<endl; //cout<<" expected internal "<<tstrength * (1 - mu) / E.size()<<endl; //cout<<"internal links: "<<inwij.size()<<" external: "<<outwij.size()<<endl; /* ofstream hout1("inwij.dat"); not_norm_histogram(inwij, hout1, 20, 0, 0); ofstream hout2("outwij.dat"); not_norm_histogram(outwij, hout2, 20, 0, 0); ofstream hout3("corrin.dat"); not_norm_histogram_correlated(inkij, inwij, hout3, 20, 0, 0); ofstream hout4("corrout.dat"); not_norm_histogram_correlated(outkij, outwij, hout4, 20, 0, 0); //*/ //*/ cout<<"average weight of an internal link "<<average_func(inwij)<<" +/- "<<sqrt(variance_func(inwij))<<endl; cout<<"average weight of an external link "<<average_func(outwij)<<" +/- "<<sqrt(variance_func(outwij))<<endl; //cout<<"average weight of an internal link expected "<<tstrength / edges * (1. - mu) / (1. - mu0)<<endl; //cout<<"average weight of an external link expected "<<tstrength / edges * (mu) / (mu0)<<endl; statout<<"internal weights (weight-occurrences)"<<endl; not_norm_histogram(inwij, statout, 20, 0, 0); statout<<endl<<"--------------------------------------"<<endl; statout<<"external weights (weight-occurrences)"<<endl; not_norm_histogram(outwij, statout, 20, 0, 0); cout<<endl<<endl; return 0; }