double _c(double **ca, int i, int j, __point *p, __point *q) { if (ca[i][j] > -1) { return ca[i][j]; } else if (i == 0 && j == 0) { ca[i][j] = euc_dist(p[0],q[0]); } else if (i > 0 && j == 0) { ca[i][j] = max(_c(ca,i-1,0,p,q),euc_dist(p[i],q[0])); } else if (i == 0 && j > 0) { ca[i][j] = max(_c(ca,0,j-1,p,q),euc_dist(p[0],q[j])); } else if (i > 0 && j > 0) { ca[i][j] = max( min( _c(ca,i-1,j,p,q), _c(ca,i-1,j-1,p,q), _c(ca,i,j-1,p,q) ), euc_dist(p[i],q[j]) ); } else { //inf ca[i][j] = DBL_MAX; } return ca[i][j]; }
float synteny_dist(int len, float *list1, float *list2, int axlen, int *ax){ //extern int *syn_negax; int i; int j; float r = 0; float cs = 0; float dist = 0; float *tmp_base_posi = NULL; //float *tmp_base_nega = NULL; float *tmp_sample = NULL; tmp_base_posi = f_calloc_vec(len); //tmp_base_nega = f_calloc_vec(len); tmp_sample = f_calloc_vec(len); r = euc_dist(len,list1,list2); for(i=0;i<len;i++){ for(j=0;j<axlen;j++){ tmp_base_posi[i] = DELTAP; if(i == j){ tmp_base_posi[i] = DELTAN; break; } } } for(i=0;i<len;i++){ tmp_sample[i] = list2[i] - list1[i]; } cs = veccos(len,tmp_base_posi,tmp_sample); dist = r * (2 - cs); /*2: parameter*/ free(tmp_base_posi); return(dist); }
void euc_dist_list(int num, int dim, float **arr, float *list, float *dist_list){ int l; #pragma omp parallel for for(l=0;l<num;l++){ dist_list[l] = euc_dist(dim,list,arr[l]); } }
void euc_dist_triangle(int num, int dim, float **arr, float **dist_table){ int l; int m; for(l=0;l<num;l++){ for(m=0;m<l;m++){ dist_table[l][m] = euc_dist(dim,arr[l],arr[m]); } } }
void euc_dist_table(int num1, float **arr1, int num2, float **arr2, int dim, float **dist_table){ int l; int m; for(l=0;l<num1;l++){ for(m=0;m<num2;m++){ dist_table[l][m] = euc_dist(dim,arr1[l],arr2[m]); } } }
void euc_dist_triangle(int num, int dim, float **arr, float **dist_table){ int l; int m; #pragma omp parallel for shared(m) for(l=0;l<num;l++){ for(m=0;m<l;m++){ dist_table[l][m] = euc_dist(dim,arr[l],arr[m]); } } }
void euc_dist_list(int num, int dim, float **arr, float *list, float *dist_list){ int l; for(l=0;l<num;l++){ dist_list[l] = euc_dist(dim,list,arr[l]); } }
int main(int argc, char **argv){ float **shake_dim_min_to_max_matrix = NULL; /* N*dim */ FILE *ifp = NULL; FILE *ofp = NULL; int with_sample_ID = 1; int arg_stat = 0; int i = 0; int j = 0; char dim_string[TMP_STRING_LEN]; int grid_tensor_elms = 1; int grid_tensor_rank = 1; int *grid_tensor_dim = NULL; int **tensor_position_matrix = NULL; float **tensor_vars = NULL; /* temporary vars */ int tmp_ptr_arr = 0; int tmp_ptr_word = 0; char *tmp_word; tmp_word = c_alloc_vec(SHORT_STRING_LEN); if(argc == 2){ if(strcmp(argv[1],"-h") == 0){ message(); usage(); options(); }else{ message(); usage(); } }else if(argc >= 4){ arg_stat = get_options(1,argc,argv); if((arg_stat&22) != 22){ usage(); exit(1); } /* (* check file format */ int max_col = 0; int min_col = 0; int lines = 0; if((ifp = fopen(sample_file,"r")) == NULL){ perror(sample_file); exit(1); } read_ilist_from_stream(1,ifp,&dim_data); read_ilist_from_stream(1,ifp,&num_data); fgetc(ifp); count_column_from_stream(ifp,&lines,&max_col,&min_col); fclose(ifp); fprintf(stderr,"1st line information: dim:%d: num:%d:\n",dim_data,num_data); fprintf(stderr,"count: max:%d: min:%d: lines:%d:\n",max_col,min_col,lines); if(max_col != min_col){ fprintf(stderr,"[EROOR] something wrong: max_col not min_col.\n"); exit(1); } if(dim_data == max_col){ fprintf(stderr,"dim_data == num of columns: with_sample_ID = 0.\n"); with_sample_ID = 0; }else if((dim_data + 1) == max_col){ fprintf(stderr,"dim_data + 1 == num of columns: with_sample_ID = 1.\n"); with_sample_ID = 1; } /* *) */ /* (* scan data, allocate mem, read data */ if((ifp = fopen(sample_file,"r")) == NULL){ perror(sample_file); exit(1); } read_ilist_from_stream(1,ifp,&dim_data); read_ilist_from_stream(1,ifp,&num_data); data_matrix = f_alloc_mat(num_data,dim_data); sample_id = c_alloc_mat(num_data,ID_LEN); if(with_sample_ID == 1){ read_ID_ftable_from_stream(num_data,dim_data,ifp,data_matrix,sample_id); }else if(with_sample_ID == 0){ read_ftable_from_stream(num_data,dim_data,ifp,data_matrix); } fclose(ifp); dim_clst = dim_data; dim_min_and_max_table = f_alloc_mat(2,dim_data); /* *) */ /* (* set min_dim_list to dim_min_and_max_table[0] */ min_list_from_matrix(dim_data,num_data,data_matrix,dim_min_and_max_table[0],0); /* *) */ /* (* set max_dim_list to dim_min_and_max_table[1] */ max_list_from_matrix(dim_data,num_data,data_matrix,dim_min_and_max_table[1],0); /* *) */ /* (* set ticks to dim_min_to_max_matrix */ dim_min_to_max_matrix = f_alloc_mat(num_clst,dim_data); make_min_to_max_table(num_clst,dim_data,dim_min_and_max_table,dim_min_to_max_matrix); /* *) */ /* (* change operation by coordinate_option */ if((strcmp(coordinate_option,"Diagonal") == 0)||(strcmp(coordinate_option,"-D") == 0)){ /* (* copy pointer for write to file */ out_matrix = dim_min_to_max_matrix; /* *) */ }else if(strcmp(coordinate_option,"random=Order") == 0){ /* (* generate random order */ int_rand_order_matrix = i_alloc_mat(dim_data,num_clst); t = time(NULL); for(i=0;i<dim_data;i++){ srand(t+33*i); mk_int_rand_order_list(0,num_clst-1,int_rand_order_matrix[i],t+pow(5,i)+(13*i)+seed); } /* *) */ /* (* set shake_dim_min_to_max_matrix from random_order & min_max_matrix */ shake_dim_min_to_max_matrix = f_alloc_mat(num_clst,dim_data); for(j=0;j<num_clst;j++){ for(i=0;i<dim_data;i++){ shake_dim_min_to_max_matrix[j][i] = dim_min_to_max_matrix[int_rand_order_matrix[i][j]][i]; } } /* *) */ /* (* copy pointer for write to file */ out_matrix = shake_dim_min_to_max_matrix; /* *) */ }else if(strcmp(coordinate_option,"random=Value") == 0){ t = time(NULL); dim_min_to_max_matrix = f_alloc_mat(num_clst,dim_data); mk_rand_coordinate(num_clst,dim_data,dim_min_and_max_table,dim_min_to_max_matrix,t+seed); out_matrix = dim_min_to_max_matrix; }else if(strcmp(coordinate_option,"node=Central") == 0){ /* check num */ if(num_data < num_clst){ printf("%d,%d\n",num_data,num_clst); fprintf(stderr,"Warn : number of clusters exceeded number of samples.\n"); exit(1); } /* init id_distance */ id_distance = status_alloc_list(num_data); for(i=0;i<num_data;i++){ id_distance[i].pos = i; id_distance[i].value = 0; } /* set centroid */ centroid = f_calloc_vec(dim_data); for(i=0;i<num_data;i++){ for(j=0;j<dim_data;j++){ centroid[j] += data_matrix[i][j]; } } for(j=0;j<dim_data;j++){ centroid[j] = centroid[j]/num_data; } /* set id_distance */ for(i=0;i<num_data;i++){ id_distance[i].value = euc_dist(dim_data,centroid,data_matrix[i]); } /* sort id_distance by distance */ qsort(id_distance,num_data,sizeof(struct status),(int(*)(const void*, const void*))cmp_dist); out_matrix = data_matrix; }else if(strcmp(coordinate_option,"node=diStant") == 0){ if(num_data < num_clst){ fprintf(stderr,"Warn : number of clusters exceeded number of samples.\n"); exit(1); } origin = f_calloc_vec(dim_data); origin_data_dist_list = f_alloc_vec(num_data); for(i=0;i<num_data;i++){ origin_data_dist_list[i] = euc_dist(dim_data,origin,data_matrix[i]); } id_distance = status_alloc_list(num_data); for(i=0;i<num_data;i++){ id_distance[i].pos = i; id_distance[i].value = origin_data_dist_list[i]; } qsort(id_distance,num_data,sizeof(struct status),(int(*)(const void*, const void*))alt_cmp_dist); out_matrix = data_matrix; }else if(strcmp(coordinate_option,"node=Power") == 0){ if(num_data < num_clst){ fprintf(stderr,"Warn : number of clusters exceeded number of samples.\n"); exit(1); } id_distance = status_alloc_list(num_data); for(i=0;i<num_data;i++){ id_distance[i].pos = i; id_distance[i].value = 0; } for(i=0;i<num_data;i++){ for(j=0;j<dim_data;j++){ id_distance[i].value = id_distance[i].value + data_matrix[i][j]; } } qsort(id_distance,num_data,sizeof(struct status),(int(*)(const void*, const void*))alt_cmp_dist); out_matrix = data_matrix; }else if(strcmp(coordinate_option,"node=outLying") == 0){ if(num_data < num_clst){ fprintf(stderr,"Warn : number of clusters exceeded number of samples.\n"); exit(1); } id_distance = status_alloc_list(num_data); for(i=0;i<num_data;i++){ id_distance[i].pos = i; id_distance[i].value = 0; } centroid = f_calloc_vec(dim_data); for(i=0;i<num_data;i++){ for(j=0;j<dim_data;j++){ centroid[j] += data_matrix[i][j]; } } for(j=0;j<dim_data;j++){ centroid[j] = centroid[j]/num_data; } for(i=0;i<num_data;i++){ id_distance[i].value = euc_dist(dim_data,centroid,data_matrix[i]); } qsort(id_distance,num_data,sizeof(struct status),(int(*)(const void*, const void*))alt_cmp_dist); out_matrix = data_matrix; }else if(strcmp(coordinate_option,"node=Intensive") == 0){ if(num_data < num_clst){ fprintf(stderr,"Warn : number of cluster exceeded number of samples.\n"); exit(1); } id_distance = status_alloc_list(num_data); for(i=0;i<num_data;i++){ id_distance[i].pos = i; id_distance[i].value = 0; } for(i=0;i<num_data;i++){ for(j=0;j<=i;j++){ id_distance[i].value += euc_dist(dim_data,data_matrix[i],data_matrix[j]); } } for(i=0;i<num_data;i++){ for(j=num_data-1;j>i;j--){ id_distance[i].value += euc_dist(dim_data,data_matrix[j],data_matrix[i]); } } qsort(id_distance,num_data,sizeof(struct status),(int(*)(const void*, const void*))cmp_dist); out_matrix = data_matrix; }else if(strncmp(coordinate_option,"node=Median",11) == 0){ //fprintf(stderr,"HOGE!!\n"); double delta = 0; int data_ptr = 0; int bin_ptr = 0; int total_bin_count = 0; //int tmp = 0; /* check num */ if(num_data < num_clst){ printf("%d,%d\n",num_data,num_clst); fprintf(stderr,"Warn : number of clusters exceeded number of samples.\n"); exit(1); } /* init id_distance */ id_distance = status_alloc_list(num_data); for(i=0;i<num_data;i++){ id_distance[i].pos = i; id_distance[i].value = 0; } /* set centroid */ centroid = f_calloc_vec(dim_data); for(i=0;i<num_data;i++){ for(j=0;j<dim_data;j++){ centroid[j] += data_matrix[i][j]; } } for(j=0;j<dim_data;j++){ centroid[j] = centroid[j]/num_data; } /* set id_distance */ for(i=0;i<num_data;i++){ id_distance[i].value = euc_dist(dim_data,centroid,data_matrix[i]); } /* sort id_distance by distance */ qsort(id_distance,num_data,sizeof(struct status),(int(*)(const void*, const void*))cmp_dist); max_dist = id_distance[num_data-1].value; min_dist = id_distance[0].value; fprintf(stderr,"max_dist:%f:\n",max_dist); fprintf(stderr,"min_dist:%f:\n",min_dist); out_matrix = data_matrix; /* set partition */ partition = (int)num_data/num_clst; fprintf(stderr,"partition:%d:\n",partition); parted_values = f_calloc_vec(partition); delta = (max_dist - min_dist)/partition; fprintf(stderr,"delta:%f:\n",delta); for(i=0;i<partition;i++){ parted_values[i] = min_dist + (i * delta); //fprintf(stderr,"%f\n",parted_values[i]); } /* (* UNDER CHECK */ bins = i_calloc_vec(partition); data_ptr = 0; bin_ptr = 0; total_bin_count = 0; //median_range = 0; median_count = 0; median_value = 0; ordered_start = 0; for(i=0;i<num_data;i++){ if(bin_ptr >= partition){ break; } if(id_distance[i].value <= parted_values[bin_ptr+1]){ total_bin_count++; bins[bin_ptr]++; }else{ if(bins[bin_ptr] > median_count){ median_count = bins[bin_ptr]; median_value = parted_values[bin_ptr]; ordered_start = total_bin_count - bins[bin_ptr]; } bin_ptr++; } } //printf(":::%d:::\n",num_data); //printf(":::%d:::\n",num_data - total_bin_count); bins[bin_ptr-1] = (num_data - total_bin_count); /* test for(i=0;i<partition;i++){ fprintf(stderr,"%f\n",parted_values[i]); fprintf(stderr," %d\n",bins[i]); tmp = tmp + bins[i]; fprintf(stderr," %d\n",tmp); } fprintf(stderr,"start:%d:\n",ordered_start); */ /* *) */ }else if(strcmp(coordinate_option,"Through") == 0){ num_clst = num_data; out_matrix = data_matrix; }else if(strncmp(coordinate_option,"Grid=",5) == 0){ sscanf(coordinate_option,"Grid=%s",dim_string); grid_tensor_rank = dim_data; grid_tensor_dim = get_tensor_dimANDrank(grid_tensor_rank,dim_string); tensor_position_matrix = make_outer_tensor_position_matrix(dim_data,&grid_tensor_elms,grid_tensor_dim); num_clst = grid_tensor_elms; if((tensor_vars = malloc((size_t)sizeof(float*)*grid_tensor_rank)) == NULL){ printf("failed : malloc() at %ld byte.\n",(long int)sizeof(float*)*grid_tensor_rank); exit(1); } for(i=0;i<grid_tensor_rank;i++){ tensor_vars[i] = make_min_to_max_list(grid_tensor_dim[i],dim_min_and_max_table[0][i],dim_min_and_max_table[1][i]); } out_matrix = f_alloc_mat(grid_tensor_elms,grid_tensor_rank); for(i=0;i<grid_tensor_elms;i++){ for(j=0;j<dim_data;j++){ out_matrix[i][j] = tensor_vars[j][tensor_position_matrix[i][j]]; } } }else if(strncmp(coordinate_option,"Axis-mean=",10) == 0){ int axis_str_len = 0; int axis_list_len = 0; int *axis_list = NULL; double *axis_v_list = NULL; sscanf(coordinate_option,"Axis-mean=%s",axis_str); axis_str_len = strlen(axis_str); for(i=0;i<axis_str_len;i++){ if(axis_str[i] == ','){ axis_list_len++; } } axis_list_len++; axis_list = i_alloc_vec(axis_list_len); axis_v_list = d_calloc_vec(axis_list_len); for(i=0;i<axis_str_len;i++){ if(axis_str[i] == ','){ tmp_word[tmp_ptr_word] = '\0'; sscanf(tmp_word,"%d",&axis_list[tmp_ptr_arr]); tmp_ptr_arr++; tmp_ptr_word = 0; }else{ tmp_word[tmp_ptr_word] = axis_str[i]; tmp_ptr_word++; } } tmp_word[tmp_ptr_word] = '\0'; sscanf(tmp_word,"%d",&axis_list[tmp_ptr_arr]); tmp_ptr_arr++; tmp_ptr_word = 0; /* num_clst */ num_clst = axis_list_len; /* out_matrix alloc */ out_matrix = f_calloc_mat(num_clst,dim_data); /* calculation */ for(i=0;i<num_data;i++){ for(j=0;j<axis_list_len;j++){ axis_v_list[j] += data_matrix[i][axis_list[j]]; } } for(j=0;j<axis_list_len;j++){ axis_v_list[j] = axis_v_list[j]/num_data; out_matrix[j][axis_list[j]] = (float)axis_v_list[j]; } }else if(strncmp(coordinate_option,"aXis-intensive=",15) == 0){ /* declear and allocation */ float axis_ratio = 1; struct st_ax_and_mean *ax_mean; if(((ax_mean = malloc(sizeof(struct st_ax_and_mean) * dim_data)) == NULL)){ fprintf(stderr, "[E] failed: malloc().\n"); exit(0); } out_matrix = f_calloc_mat(dim_data,dim_data); /* ratio */ sscanf(coordinate_option,"aXis-intensive=%f",&axis_ratio); mat_TO_st_ax_and_mean(dim_data, num_data, data_matrix, ax_mean); qsort(ax_mean,dim_data,sizeof(struct st_ax_and_mean),(int(*)(const void*, const void*))cmp_ax_and_mean); for(j=0;j<dim_data;j++){ out_matrix[j][ax_mean[j].ax] = axis_ratio * ax_mean[j].mean; } }else{ options(); exit(1); } /* *) */ /* (* open file and write shake_dim_min_to_max_matrix */ ofp = fopen(cluster_file,"w"); if(ofp == NULL){ perror(cluster_file); exit(1); } if(ordered_output == 0){ fprintf(ofp,"%d %d\n",dim_data,num_clst); for(j=0;j<num_clst;j++){ for(i=0;i<dim_data;i++){ fprintf(ofp,"%f ",out_matrix[j][i]); } fprintf(ofp,"\n"); } fclose(ofp); }else{ fprintf(ofp,"%d %d\n",dim_data,num_clst); for(j=ordered_start;j<num_clst+ordered_start;j++){ for(i=0;i<dim_data;i++){ fprintf(ofp,"%f ",out_matrix[id_distance[j].pos][i]); } fprintf(ofp,"\n"); } fclose(ofp); } /* *) */ }else{ message(); usage(); } return(0); }