double c_nodes_jacc(int_matrix & ten, int_matrix & en, int dim) {
	
	
	// this function does a best match based on the jaccard index.
	// note that it should be weighted on the cluster size (I believe)
	
	
	deque<deque<int> > mems;
	
	deque<int> first;
	for(int i=0; i<dim; i++)
		mems.push_back(first);
	
	for (int ii=0; ii<int(ten.size()); ii++)
		for(int i=0; i<int(ten[ii].size()); i++)
			mems[ten[ii][i]].push_back(ii);
	
	double global_overlap=0;
	RANGE_loop(k, en) {
		
		deque<int> & c = en[k];
		
		map<int, int> com_ol;		// it maps the index of the ten into the overlap with en[k]
		
		RANGE_loop(i, c) {
			
			for(int j=0; j<int(mems[c[i]].size()); j++)
				int_histogram(mems[c[i]][j], com_ol);
		}
		
		double max_jac=0;
		for(map<int, int>::iterator itm=com_ol.begin(); itm!=com_ol.end(); itm++) {
			
			set<int> s1;
			set<int> s2;
			
			deque_to_set(c, s1);
			deque_to_set(ten[itm->first], s2);
			
			double jc=jaccard(s1, s2);
			cout<<"jc: "<<jc<<endl;
			max_jac=max(max_jac, jc);
			
		}
		
		global_overlap+=max_jac;
		cout<<"========== "<<global_overlap<<endl;
	}
Exemple #2
0
int main (int argc, char *argv[]) {
	char input_file[MAX_DIR] = MATRIX_DIR; 
	char input_index_file[MAX_DIR] = MATRIX_DIR; 
	char input_matrix_file[MAX_DIR] = MATRIX_DIR; 
	char input_matrix_name[MAX_DIR];
	char tmp_module_file[MAX_DIR] = MATRIX_DIR; 
	char final_module_file[MAX_DIR] = MATRIX_DIR; 
	char output_file[MAX_DIR] = MATRIX_DIR;
	char log_file[MAX_DIR] = MATRIX_DIR;
	char answer_file[MAX_DIR] = MATRIX_DIR;
	char prog[MAX_DIR]=BIN_DIR; 
	char mode, prog_option; 
	int i,j, k, l, num_edges, *pnum_edges, num_modules, *pnum_modules, num_answer_modules, *pnum_answer_modules; 
	ModulePtr modules, module, answer_modules, answer_module; 
	char tmp_file[MAX_DIR] = MATRIX_DIR; 
	double M, final_M; 
	int num_vertices, *pnum_vertices, num_lines; 
	int *degrees, total_edges; 
	int rand_num; 
	FILE *fp, *log; 
	time_t t_0, t_1, t, t_last;
	clock_t clo_0, clo_1, clo_last; 
	int elapTicks; 
	double elapMilli, elapSeconds, elapMinutes;  
	char answer_option; 
	double acc_0, acc_1; 
	double th1_up, th1_low, th2_up, th2_low; 
	char suffix[MAX_ATTR_NAME]; 
	char rand_graph_option; 

	if (argc < 3) { 
		fprintf(stderr, "Too few arguments!\n");
		fprintf(stderr, "Usage:\n");
		fprintf(stderr, "<Program Name> input_matrix (tab delimitted (string or index)) program_option (1: MSG.out; 2: SCNewman; 3: testQcut) answer_option (0: no answer; 1: answer) answer_file\n"); 
		exit(EXIT_FAILURE);
	}

	strcat(input_file, argv[1]); 
	strcpy (input_matrix_name, argv[1]); 
	prog_option = atoi(argv[2]); 
	answer_option = atoi(argv[3]); 
	if (answer_option) { 
		if (argc < 4) { 
			fprintf(stderr, "Missing answer file\n"); 
			fprintf(stderr, "Usage:\n");
			fprintf(stderr, "<Program Name> input_matrix (tab delimitted (string or index)) program_option (1: MSG.out; 2: SCNewman; 3: testQcut) answer_option (0: no answer; 1: answer) answer_file\n");
			exit(EXIT_FAILURE);
		}
		strcat(answer_file, argv[4]); 
	}

	strcat(input_index_file, argv[1]); 
	strcat(input_index_file, ".index"); 
	strcat(input_matrix_file, argv[1]); 
	strcat(input_matrix_file, ".matrix"); 

	strcat(output_file, argv[1]); 
	strcat(output_file, "_result_"); 
	strcat(log_file, argv[1]); 
	strcat(log_file, "_"); 

	strcat(final_module_file, argv[1]); 
	strcat(final_module_file, "_result_"); 

	if (prog_option == 1) { 
		strcat (prog, "MSG.out"); 
		strcat (input_matrix_name, "_MSG.out"); 
		strcat(final_module_file, "iNP_MSG"); 
		strcat(output_file, "MSG"); 
		strcat(log_file, "iNP_MSG.log"); 
	} else if (prog_option == 2) { 
		strcat (prog, "do_SCNewman"); 
		strcat (input_matrix_name, "_SCNewman");
		strcat(final_module_file, "iNP_SCNewman"); 
		strcat(output_file, "SCNewman"); 
		strcat(log_file, "iNP_SCNewman.log"); 
	} else if (prog_option == 3) { 
		strcat (prog, "qcut.pl");
		strcat (input_matrix_name, "_qcut"); 
		strcat(final_module_file, "iNP_Qcut"); 
		strcat(output_file, "Qcut"); 
		strcat(log_file, "iNP_Qcut.log"); 
	} else { 
		fprintf(stderr, "<Program Name> input_matrix (tab delimitted (string or index)) mode (0, 1, 2) program_option (1: MSG.out)\n"); 
		fprintf(stderr, "program_option currently allows only 1 - 3 (1: MSG, 2: SC, 3: Qcut)\n"); 
		exit(EXIT_FAILURE); 
	}
	if ((log = fopen(log_file, "w")) == NULL) {
		fprintf(stderr, "\nFailure to write file %s in read mode\n", log_file);
		fflush(NULL);
		return(-1);
	}

	num_vertices = 0; 
	pnum_vertices = &num_vertices;
	num_lines =	process_input(input_file, input_index_file, input_matrix_file, pnum_vertices); 
	degrees = (int *) calloc(num_vertices, sizeof(int))-1;
	total_edges = read_input_matrix(input_matrix_file, num_vertices, degrees); 

	num_modules = 0; 
	pnum_modules = &num_modules; 
	modules = (ModulePtr) malloc(num_vertices * sizeof(ModuleStr)) - 1; 

	time (&t_0); 
	M = partition_network(input_matrix_file, pnum_modules, modules, prog, prog_option); 
	time (&t_1); 

	if ((fp = fopen(output_file, "w")) == NULL) {
		fprintf(stderr, "\nFailure to write file %s in read mode\n", output_file);
		fflush(NULL);
		return(-1);
	}

/* print networks partitioned without iterations */ 
	for (i = 1; i <= num_modules; i ++) { 
		module = modules + i; 
		for (j = 1; j <= module->num_nodes; j ++) { 
			fprintf(fp, "%s\t", (vertices + module->nodes[j])->name); 
		}
		fprintf(fp, "\n");
	}
	fprintf(fp, "%f\n", M); 
	fclose (fp); 

	final_modules = (ModulePtr) malloc(num_vertices * sizeof(ModuleStr)) - 1; 
	num_final_modules = 0; 
	pnum_final_modules = &num_final_modules; 

	//printf ("%s %f\n", output_file, M); 
	rand_num = (int) rand(); 
	for (i = 1; i <= num_modules; i ++) { 
		module = modules + i; 
		iRun(module, num_vertices, prog, input_matrix_name, prog_option); 
	}
	if (num_final_modules) { 
		final_M = calculate_modularity(num_final_modules, final_modules, degrees, total_edges); 
		/* print networks partitioned with iterations */ 
		print_modules(final_module_file, final_M); 
	}
	time (&t_last); 

	fprintf (log, "NP\tnum_modules\t%d\tmodularity\t%f\ttime\t%f\t", num_modules, M, difftime(t_1, t_0)); 
	if (answer_option) { 
		acc_0 = jaccard(answer_file, output_file); 
		fprintf (log, "acc\t%f\n", acc_0); 
	} else { 
		fprintf (log, "\n"); 
	}
	fprintf (log, "iNP\tnum_modules\t%d\tmodularity\t%f\ttime\t%f\t", num_final_modules, final_M, difftime(t_last, t_0)); 
	if (answer_option) { 
		acc_1 = jaccard(answer_file, final_module_file); 
		fprintf (log, "acc\t%f\n", acc_1); 
	} else { 
		fprintf (log, "\n"); 
	}
	fclose (log); 
	
	remove(input_matrix_file); 
	remove(input_index_file); 

	return 0;
}