Exemple #1
0
double _c(double **ca, int i, int j, __point *p, __point *q) {
        if (ca[i][j] > -1) {
            return ca[i][j];
            }
        else if (i == 0 && j == 0) {
            ca[i][j] = euc_dist(p[0],q[0]);
            }
        else if (i > 0 && j == 0) {
            ca[i][j] = max(_c(ca,i-1,0,p,q),euc_dist(p[i],q[0]));
            }
        else if (i == 0 && j > 0) {
            ca[i][j] = max(_c(ca,0,j-1,p,q),euc_dist(p[0],q[j]));
            }
        else if (i > 0 && j > 0) {
            ca[i][j] = max(
                min(
                    _c(ca,i-1,j,p,q),
                    _c(ca,i-1,j-1,p,q),
                    _c(ca,i,j-1,p,q)
                    ),
                euc_dist(p[i],q[j])
                );
            }
        else {
            //inf
            ca[i][j] = DBL_MAX;
            }
        return ca[i][j];
        }
Exemple #2
0
float synteny_dist(int len, float *list1, float *list2, int axlen, int *ax){
	//extern int *syn_negax;
	int i;
	int j;
	float r = 0;
	float cs = 0;
	float dist = 0;
	float *tmp_base_posi = NULL;
	//float *tmp_base_nega = NULL;
	float *tmp_sample = NULL;
	tmp_base_posi = f_calloc_vec(len);
	//tmp_base_nega = f_calloc_vec(len);
	tmp_sample = f_calloc_vec(len);
	r = euc_dist(len,list1,list2);
	for(i=0;i<len;i++){
		for(j=0;j<axlen;j++){
			tmp_base_posi[i] = DELTAP;
			if(i == j){
				tmp_base_posi[i] = DELTAN;
				break;
			}
		}
	}
	for(i=0;i<len;i++){
		tmp_sample[i] = list2[i] - list1[i];
	}
	cs = veccos(len,tmp_base_posi,tmp_sample);
	dist = r * (2 - cs);	/*2: parameter*/
	free(tmp_base_posi);
	return(dist);
}
void euc_dist_list(int num, int dim, float **arr, float *list, float *dist_list){
	int l;
	#pragma omp parallel for
	for(l=0;l<num;l++){
		dist_list[l] = euc_dist(dim,list,arr[l]);
	}
}
Exemple #4
0
void euc_dist_triangle(int num, int dim, float **arr, float **dist_table){
	int l;
	int m;
	for(l=0;l<num;l++){
		for(m=0;m<l;m++){
			dist_table[l][m] = euc_dist(dim,arr[l],arr[m]);
		}
	}
}
Exemple #5
0
void euc_dist_table(int num1, float **arr1, int num2, float **arr2, int dim, float **dist_table){
	int l;
	int m;
	for(l=0;l<num1;l++){
		for(m=0;m<num2;m++){
			dist_table[l][m] = euc_dist(dim,arr1[l],arr2[m]);
		}
	}
}
void euc_dist_triangle(int num, int dim, float **arr, float **dist_table){
	int l;
	int m;
	#pragma omp parallel for shared(m)
	for(l=0;l<num;l++){
		for(m=0;m<l;m++){
			dist_table[l][m] = euc_dist(dim,arr[l],arr[m]);
		}
	}
}
Exemple #7
0
void euc_dist_list(int num, int dim, float **arr, float *list, float *dist_list){
	int l;
	for(l=0;l<num;l++){
		dist_list[l] = euc_dist(dim,list,arr[l]);
	}
}
Exemple #8
0
int main(int argc, char **argv){
	float **shake_dim_min_to_max_matrix = NULL;	/* N*dim */
	FILE *ifp = NULL;
	FILE *ofp = NULL;
	int with_sample_ID = 1;
	int arg_stat = 0;
	int i = 0;
	int j = 0;
	char dim_string[TMP_STRING_LEN];
	int grid_tensor_elms = 1;
	int grid_tensor_rank = 1;
	int *grid_tensor_dim = NULL;
	int **tensor_position_matrix = NULL;
	float **tensor_vars = NULL;

	/* temporary vars */
	int tmp_ptr_arr = 0;
	int tmp_ptr_word = 0;
	char *tmp_word;
	tmp_word = c_alloc_vec(SHORT_STRING_LEN);

	if(argc == 2){
		if(strcmp(argv[1],"-h") == 0){
			message();
			usage();
			options();
		}else{
			message();
			usage();
		}
	}else if(argc >= 4){
		arg_stat = get_options(1,argc,argv);
		if((arg_stat&22) != 22){
			usage();
			exit(1);
		}
		/* (* check file format */
		int max_col = 0;
		int min_col = 0;
		int lines = 0;
		if((ifp = fopen(sample_file,"r")) == NULL){
			perror(sample_file);
			exit(1);
		}
		read_ilist_from_stream(1,ifp,&dim_data);
		read_ilist_from_stream(1,ifp,&num_data);
		fgetc(ifp);
		count_column_from_stream(ifp,&lines,&max_col,&min_col);
		fclose(ifp);
		fprintf(stderr,"1st line information: dim:%d: num:%d:\n",dim_data,num_data);
		fprintf(stderr,"count: max:%d: min:%d: lines:%d:\n",max_col,min_col,lines);
		if(max_col != min_col){
			fprintf(stderr,"[EROOR] something wrong: max_col not min_col.\n");
			exit(1);
		}
		if(dim_data == max_col){
			fprintf(stderr,"dim_data == num of columns: with_sample_ID = 0.\n");
			with_sample_ID = 0;
		}else if((dim_data + 1) == max_col){
			fprintf(stderr,"dim_data + 1 == num of columns: with_sample_ID = 1.\n");
			with_sample_ID = 1;
		}
		/* *) */
		/* (* scan data, allocate mem, read data */
		if((ifp = fopen(sample_file,"r")) == NULL){
			perror(sample_file);
			exit(1);
		}
		read_ilist_from_stream(1,ifp,&dim_data);
		read_ilist_from_stream(1,ifp,&num_data);
		data_matrix = f_alloc_mat(num_data,dim_data);
		sample_id = c_alloc_mat(num_data,ID_LEN);
		if(with_sample_ID == 1){
			read_ID_ftable_from_stream(num_data,dim_data,ifp,data_matrix,sample_id);
		}else if(with_sample_ID == 0){
			read_ftable_from_stream(num_data,dim_data,ifp,data_matrix);
		}
		fclose(ifp);
		dim_clst = dim_data;
		dim_min_and_max_table = f_alloc_mat(2,dim_data);
		/* *) */
		/* (* set min_dim_list to dim_min_and_max_table[0] */
		min_list_from_matrix(dim_data,num_data,data_matrix,dim_min_and_max_table[0],0);
		/* *) */
		/* (* set max_dim_list to dim_min_and_max_table[1] */
		max_list_from_matrix(dim_data,num_data,data_matrix,dim_min_and_max_table[1],0);
		/* *) */
		/* (* set ticks to dim_min_to_max_matrix */
		dim_min_to_max_matrix = f_alloc_mat(num_clst,dim_data);
		make_min_to_max_table(num_clst,dim_data,dim_min_and_max_table,dim_min_to_max_matrix);
		/* *) */
		/* (* change operation by coordinate_option */
		if((strcmp(coordinate_option,"Diagonal") == 0)||(strcmp(coordinate_option,"-D") == 0)){
			/* (* copy pointer for write to file */
			out_matrix = dim_min_to_max_matrix;
			/* *) */


		}else if(strcmp(coordinate_option,"random=Order") == 0){
			/* (* generate random order */
			int_rand_order_matrix = i_alloc_mat(dim_data,num_clst);
			t = time(NULL);
			for(i=0;i<dim_data;i++){
				srand(t+33*i);
				mk_int_rand_order_list(0,num_clst-1,int_rand_order_matrix[i],t+pow(5,i)+(13*i)+seed);
			}
			/* *) */
			/* (* set shake_dim_min_to_max_matrix from random_order & min_max_matrix */
			shake_dim_min_to_max_matrix = f_alloc_mat(num_clst,dim_data);
			for(j=0;j<num_clst;j++){
				for(i=0;i<dim_data;i++){
					shake_dim_min_to_max_matrix[j][i] = dim_min_to_max_matrix[int_rand_order_matrix[i][j]][i];
				}
			}
			/* *) */
			/* (* copy pointer for write to file */
			out_matrix = shake_dim_min_to_max_matrix;
			/* *) */


		}else if(strcmp(coordinate_option,"random=Value") == 0){
			t = time(NULL);
			dim_min_to_max_matrix = f_alloc_mat(num_clst,dim_data);
			mk_rand_coordinate(num_clst,dim_data,dim_min_and_max_table,dim_min_to_max_matrix,t+seed);
			out_matrix = dim_min_to_max_matrix;


		}else if(strcmp(coordinate_option,"node=Central") == 0){
			/* check num */
			if(num_data < num_clst){
				printf("%d,%d\n",num_data,num_clst);
				fprintf(stderr,"Warn : number of clusters exceeded number of samples.\n");
				exit(1);
			}

			/* init id_distance */
			id_distance = status_alloc_list(num_data);
			for(i=0;i<num_data;i++){
				id_distance[i].pos = i;
				id_distance[i].value = 0;
			}

			/* set centroid */
			centroid = f_calloc_vec(dim_data);
			for(i=0;i<num_data;i++){
				for(j=0;j<dim_data;j++){
					centroid[j] += data_matrix[i][j];
				}
			}
			for(j=0;j<dim_data;j++){
				centroid[j] = centroid[j]/num_data;
			}

			/* set id_distance */
			for(i=0;i<num_data;i++){
				id_distance[i].value = euc_dist(dim_data,centroid,data_matrix[i]);
			}

			/* sort id_distance by distance */
			qsort(id_distance,num_data,sizeof(struct status),(int(*)(const void*, const void*))cmp_dist);
			out_matrix = data_matrix;


		}else if(strcmp(coordinate_option,"node=diStant") == 0){
			if(num_data < num_clst){
				fprintf(stderr,"Warn : number of clusters exceeded number of samples.\n");
				exit(1);
			}
			origin = f_calloc_vec(dim_data);
			origin_data_dist_list = f_alloc_vec(num_data);
			for(i=0;i<num_data;i++){
				origin_data_dist_list[i] = euc_dist(dim_data,origin,data_matrix[i]);
			}
			id_distance = status_alloc_list(num_data);
			for(i=0;i<num_data;i++){
				id_distance[i].pos = i;
				id_distance[i].value = origin_data_dist_list[i];
			}
			qsort(id_distance,num_data,sizeof(struct status),(int(*)(const void*, const void*))alt_cmp_dist);
			out_matrix = data_matrix;


		}else if(strcmp(coordinate_option,"node=Power") == 0){
			if(num_data < num_clst){
				fprintf(stderr,"Warn : number of clusters exceeded number of samples.\n");
				exit(1);
			}
			id_distance = status_alloc_list(num_data);
			for(i=0;i<num_data;i++){
				id_distance[i].pos = i;
				id_distance[i].value = 0;
			}
			for(i=0;i<num_data;i++){
				for(j=0;j<dim_data;j++){
					id_distance[i].value = id_distance[i].value + data_matrix[i][j];
				}
			}
			qsort(id_distance,num_data,sizeof(struct status),(int(*)(const void*, const void*))alt_cmp_dist);
			out_matrix = data_matrix;


		}else if(strcmp(coordinate_option,"node=outLying") == 0){
			if(num_data < num_clst){
				fprintf(stderr,"Warn : number of clusters exceeded number of samples.\n");
				exit(1);
			}
			id_distance = status_alloc_list(num_data);
			for(i=0;i<num_data;i++){
				id_distance[i].pos = i;
				id_distance[i].value = 0;
			}

			centroid = f_calloc_vec(dim_data);
			for(i=0;i<num_data;i++){
				for(j=0;j<dim_data;j++){
					centroid[j] += data_matrix[i][j];
				}
			}
			for(j=0;j<dim_data;j++){
				centroid[j] = centroid[j]/num_data;
			}
			for(i=0;i<num_data;i++){
				id_distance[i].value = euc_dist(dim_data,centroid,data_matrix[i]);
			}

			qsort(id_distance,num_data,sizeof(struct status),(int(*)(const void*, const void*))alt_cmp_dist);
			out_matrix = data_matrix;


		}else if(strcmp(coordinate_option,"node=Intensive") == 0){
			if(num_data < num_clst){
				fprintf(stderr,"Warn : number of cluster exceeded number of samples.\n");
				exit(1);
			}
			id_distance = status_alloc_list(num_data);
			for(i=0;i<num_data;i++){
				id_distance[i].pos = i;
				id_distance[i].value = 0;
			}
			for(i=0;i<num_data;i++){
				for(j=0;j<=i;j++){
					id_distance[i].value += euc_dist(dim_data,data_matrix[i],data_matrix[j]);
				}
			}
			for(i=0;i<num_data;i++){
				for(j=num_data-1;j>i;j--){
					id_distance[i].value += euc_dist(dim_data,data_matrix[j],data_matrix[i]);
				}
			}
			qsort(id_distance,num_data,sizeof(struct status),(int(*)(const void*, const void*))cmp_dist);
			out_matrix = data_matrix;


		}else if(strncmp(coordinate_option,"node=Median",11) == 0){
			//fprintf(stderr,"HOGE!!\n");
			double delta = 0;
			int data_ptr = 0;
			int bin_ptr = 0;
			int total_bin_count = 0;
			//int tmp = 0;
			/* check num */
			if(num_data < num_clst){
				printf("%d,%d\n",num_data,num_clst);
				fprintf(stderr,"Warn : number of clusters exceeded number of samples.\n");
				exit(1);
			}

			/* init id_distance */
			id_distance = status_alloc_list(num_data);
			for(i=0;i<num_data;i++){
				id_distance[i].pos = i;
				id_distance[i].value = 0;
			}

			/* set centroid */
			centroid = f_calloc_vec(dim_data);
			for(i=0;i<num_data;i++){
				for(j=0;j<dim_data;j++){
					centroid[j] += data_matrix[i][j];
				}
			}
			for(j=0;j<dim_data;j++){
				centroid[j] = centroid[j]/num_data;
			}

			/* set id_distance */
			for(i=0;i<num_data;i++){
				id_distance[i].value = euc_dist(dim_data,centroid,data_matrix[i]);
			}
			

			/* sort id_distance by distance */
			qsort(id_distance,num_data,sizeof(struct status),(int(*)(const void*, const void*))cmp_dist);
			max_dist = id_distance[num_data-1].value;
			min_dist = id_distance[0].value;
			fprintf(stderr,"max_dist:%f:\n",max_dist);
			fprintf(stderr,"min_dist:%f:\n",min_dist);
			out_matrix = data_matrix;

			/* set partition */
			partition = (int)num_data/num_clst;
			fprintf(stderr,"partition:%d:\n",partition);
			parted_values = f_calloc_vec(partition);
			delta = (max_dist - min_dist)/partition;
			fprintf(stderr,"delta:%f:\n",delta);
			for(i=0;i<partition;i++){
				parted_values[i] = min_dist + (i * delta); 
				//fprintf(stderr,"%f\n",parted_values[i]);
			}

			/* (* UNDER CHECK */
			bins = i_calloc_vec(partition);
			data_ptr = 0;
			bin_ptr = 0;
			total_bin_count = 0;
			//median_range = 0;
			median_count = 0;
			median_value = 0;
			ordered_start = 0;
			for(i=0;i<num_data;i++){
				if(bin_ptr >= partition){
					break;
				}
				if(id_distance[i].value <= parted_values[bin_ptr+1]){
					total_bin_count++;
					bins[bin_ptr]++;
				}else{
					if(bins[bin_ptr] > median_count){
						median_count = bins[bin_ptr];
						median_value = parted_values[bin_ptr];
						ordered_start = total_bin_count - bins[bin_ptr];
					}
					bin_ptr++;
				}
			}
			//printf(":::%d:::\n",num_data);
			//printf(":::%d:::\n",num_data - total_bin_count);
			bins[bin_ptr-1] = (num_data - total_bin_count);

			/* test 
			for(i=0;i<partition;i++){
				fprintf(stderr,"%f\n",parted_values[i]);
				fprintf(stderr,"    %d\n",bins[i]);
				tmp = tmp + bins[i];
				fprintf(stderr,"      %d\n",tmp);
			}
			fprintf(stderr,"start:%d:\n",ordered_start);
			 */
			/* *) */


		}else if(strcmp(coordinate_option,"Through") == 0){
			num_clst = num_data;
			out_matrix = data_matrix;


		}else if(strncmp(coordinate_option,"Grid=",5) == 0){
			sscanf(coordinate_option,"Grid=%s",dim_string);
			grid_tensor_rank = dim_data;
			grid_tensor_dim = get_tensor_dimANDrank(grid_tensor_rank,dim_string);
			tensor_position_matrix = make_outer_tensor_position_matrix(dim_data,&grid_tensor_elms,grid_tensor_dim);
			num_clst = grid_tensor_elms;
			if((tensor_vars = malloc((size_t)sizeof(float*)*grid_tensor_rank)) == NULL){
				printf("failed : malloc() at %ld byte.\n",(long int)sizeof(float*)*grid_tensor_rank);
				exit(1);
			}
			for(i=0;i<grid_tensor_rank;i++){
				tensor_vars[i] = make_min_to_max_list(grid_tensor_dim[i],dim_min_and_max_table[0][i],dim_min_and_max_table[1][i]);
			}
			out_matrix = f_alloc_mat(grid_tensor_elms,grid_tensor_rank);
			for(i=0;i<grid_tensor_elms;i++){
				for(j=0;j<dim_data;j++){
					out_matrix[i][j] = tensor_vars[j][tensor_position_matrix[i][j]];
				}
			}


		}else if(strncmp(coordinate_option,"Axis-mean=",10) == 0){
			int axis_str_len = 0;
			int axis_list_len = 0;
			int *axis_list = NULL;
			double *axis_v_list = NULL;
			sscanf(coordinate_option,"Axis-mean=%s",axis_str);
			axis_str_len = strlen(axis_str);
			for(i=0;i<axis_str_len;i++){
				if(axis_str[i] == ','){
					axis_list_len++;
				}
			}
			axis_list_len++;
			axis_list = i_alloc_vec(axis_list_len);
			axis_v_list = d_calloc_vec(axis_list_len);
			for(i=0;i<axis_str_len;i++){
				if(axis_str[i] == ','){
					tmp_word[tmp_ptr_word] = '\0';
					sscanf(tmp_word,"%d",&axis_list[tmp_ptr_arr]);
					tmp_ptr_arr++;
					tmp_ptr_word = 0;
				}else{
					tmp_word[tmp_ptr_word] = axis_str[i];
					tmp_ptr_word++;
				}
			}
			tmp_word[tmp_ptr_word] = '\0';
			sscanf(tmp_word,"%d",&axis_list[tmp_ptr_arr]);
			tmp_ptr_arr++;
			tmp_ptr_word = 0;
			/* num_clst */
			num_clst = axis_list_len;
			/* out_matrix alloc */
			out_matrix = f_calloc_mat(num_clst,dim_data);
			/* calculation */
			for(i=0;i<num_data;i++){
				for(j=0;j<axis_list_len;j++){
					axis_v_list[j] += data_matrix[i][axis_list[j]];
				}
			}
			for(j=0;j<axis_list_len;j++){
				axis_v_list[j] = axis_v_list[j]/num_data;
				out_matrix[j][axis_list[j]] = (float)axis_v_list[j];
			}

		}else if(strncmp(coordinate_option,"aXis-intensive=",15) == 0){
			/* declear and allocation */
			float axis_ratio = 1;
			struct st_ax_and_mean *ax_mean;
			if(((ax_mean = malloc(sizeof(struct st_ax_and_mean) * dim_data)) == NULL)){ fprintf(stderr, "[E] failed: malloc().\n"); exit(0); }
			out_matrix = f_calloc_mat(dim_data,dim_data);
			/* ratio */
			sscanf(coordinate_option,"aXis-intensive=%f",&axis_ratio);
			mat_TO_st_ax_and_mean(dim_data, num_data, data_matrix, ax_mean);
			qsort(ax_mean,dim_data,sizeof(struct st_ax_and_mean),(int(*)(const void*, const void*))cmp_ax_and_mean);
			for(j=0;j<dim_data;j++){
				out_matrix[j][ax_mean[j].ax] = axis_ratio * ax_mean[j].mean;
			}
	
	
		}else{
			options();
			exit(1);
		}
		/* *) */
		/* (* open file and write shake_dim_min_to_max_matrix */
		ofp = fopen(cluster_file,"w");
		if(ofp == NULL){
			perror(cluster_file);
			exit(1);
		}
		if(ordered_output == 0){
			fprintf(ofp,"%d %d\n",dim_data,num_clst);
			for(j=0;j<num_clst;j++){
				for(i=0;i<dim_data;i++){
					fprintf(ofp,"%f ",out_matrix[j][i]);
				}
				fprintf(ofp,"\n");
			}
			fclose(ofp);
		}else{
			fprintf(ofp,"%d %d\n",dim_data,num_clst);
			for(j=ordered_start;j<num_clst+ordered_start;j++){
				for(i=0;i<dim_data;i++){
					fprintf(ofp,"%f ",out_matrix[id_distance[j].pos][i]);
				}
				fprintf(ofp,"\n");
			}
			fclose(ofp);
		}
		/* *) */

	}else{
		message();
		usage();
	}
	return(0);
}