int insert_triple_to_heap(Representation* X_rep, Representation* Y_rep, int ** x_triple_array, int ** y_triple_array, int x_triple_cnt, int y_triple_cnt, PriorityQueue * heap) { Triple * triple_array = malloc(y_triple_cnt * sizeof(Triple)); int m; for (m = 0; m < y_triple_cnt; ++m) triple_array[m].rmsd = 11; int i, j; int cnt_x = x_triple_cnt; int cnt_y = y_triple_cnt; int ** x_triple_array_local = intmatrix(cnt_x, 3); int ** y_triple_array_local = intmatrix(cnt_y, 3); memcpy(*x_triple_array_local, *x_triple_array, sizeof (int) * 3 * cnt_x); memcpy(*y_triple_array_local, *y_triple_array, sizeof (int) * 3 * cnt_y); int x_triple[3]; int y_triple[3]; double rmsd; double cutoff_rmsd = 3.0; /* <<<<<<<<<<<<<<<<< hardcoded */ double q_init[4] = {0.0}; // no change for (i = 0; i < cnt_x; ++i) { for (j = 0; j < cnt_y; ++j) { x_triple[0] = x_triple_array_local[i][0]; x_triple[1] = x_triple_array_local[i][1]; x_triple[2] = x_triple_array_local[i][2]; y_triple[0] = y_triple_array_local[j][0]; y_triple[1] = y_triple_array_local[j][1]; y_triple[2] = y_triple_array_local[j][2]; if (!same_hand_triple(X_rep, x_triple, Y_rep, y_triple, 3)) continue; if (distance_of_nearest_approach(X_rep, x_triple, Y_rep, y_triple, 3, &rmsd)) continue; if (rmsd > cutoff_rmsd) continue; triple_array[j].rmsd = rmsd; memcpy(&triple_array[j].quat, &q_init, 4 * sizeof (double)); memcpy(&triple_array[j].triple_x, &x_triple, 3 * sizeof (double)); memcpy(&triple_array[j].triple_y, &y_triple, 3 * sizeof (double)); } } free(triple_array); return 0; }
int init_triples_array(Triples_array * triples_array, int size){ if ( ! (triples_array->hhh_array = intmatrix(size,3)) ) return 0; if ( ! (triples_array->hhs_array = intmatrix(size,3)) ) return 0; if ( ! (triples_array->hsh_array = intmatrix(size,3)) ) return 0; if ( ! (triples_array->hss_array = intmatrix(size,3)) ) return 0; if ( ! (triples_array->shh_array = intmatrix(size,3)) ) return 0; if ( ! (triples_array->ssh_array = intmatrix(size,3)) ) return 0; if ( ! (triples_array->shs_array = intmatrix(size,3)) ) return 0; if ( ! (triples_array->sss_array = intmatrix(size,3)) ) return 0; return 1; }
int process_almt (Options *options, Alignment *alignment) { int retval; int process_exons (Options *options, Alignment * alignment); int protected_positions (Options *options, Alignment * alignment); /* store the position of exons, and replace them with gaps in the alignment */ process_exons (options, alignment); /* gaps */ count_gaps (options, alignment); /* protected positions */ protected_positions (options, alignment); /*allocate space for various indicators of sequence similarity*/ alignment->seq_dist = dmatrix ( alignment->number_of_seqs, alignment->number_of_seqs); if ( !alignment->seq_dist ) return 1; alignment->aligned_sites = intmatrix ( alignment->number_of_seqs, alignment->number_of_seqs); if ( ! alignment->aligned_sites ) return 1; alignment->identical_sites = intmatrix ( alignment->number_of_seqs, alignment->number_of_seqs); if ( ! alignment->identical_sites ) return 1; alignment->similar_sites = intmatrix ( alignment->number_of_seqs, alignment->number_of_seqs); if ( ! alignment->similar_sites ) return 1; retval = seq_pw_dist (alignment); if ( retval) return retval; return 0; }
int in_group_entropy ( Alignment * alignment, int * similar_to, double **score){ int group, col, seq, ctr, aa; int **freq, *norm; /* ASCII == 128, ASCII size */ double p, entropy; if ( ! (freq=intmatrix(alignment->no_groups, ASCII)) ) return 1; if ( ! (norm=emalloc(alignment->no_groups*sizeof(int))) ) return 1; for (col = 0; col < alignment->length; col++){ /* find frequencies */ for (group=0; group< alignment->no_groups; group++) { memset (freq[group], 0, ASCII*sizeof(int)); norm[group] = 0; } for (seq=0; seq< alignment->number_of_seqs; seq++){ aa = (int) alignment->sequence[seq][col]; if ( aa == 'X' || aa == 'x' ) continue; if (similar_to) aa=similar_to[aa]; group = alignment->belongs_to_group[seq]; freq [group][aa]++; norm [group] ++; } /* find entropy */ for (group=0; group< alignment->no_groups; group++) { entropy = 0.0; for ( ctr=0; ctr < ASCII; ctr++) { if ( freq[group][ctr] ) { p = (double)freq[group][ctr]/norm[group]; entropy -= p*log(p); } } score[group][col] = entropy; } } free_imatrix (freq); free (norm); return 0; }
int clustering ( Protein *protein, int * res_rank, int * int_cvg, double *clustering_score){ int ctr, pos; int L = protein->length; int first; int *selection; int ** adj_matrix; double weight, avg, std_dev, z; double cutoff_dist = CUTOFF_DIST; int cluster_score (int no_of_res, int *seq, int ** adj_matrix,double *score); int std_dev_over_S (int L, int M, int ** adj_matrix, double *avg, double * std_dev, int first); selection = (int *) emalloc (protein->length*sizeof(int)); adj_matrix = intmatrix( L,L); determine_adj_matrix (adj_matrix, protein->sequence, L, cutoff_dist); for (ctr=0; ctr < L && int_cvg[ctr]; ctr++ ) { /* turn coverage into selection array */ for (pos = 0; pos < L; pos ++ ) { selection[pos] = ( res_rank[pos] <= int_cvg[ctr] ); } /* find weight */ cluster_score (L, selection, adj_matrix, &weight); /* find avg and stddev in the set of random picks */ std_dev_over_S (L, int_cvg[ctr], adj_matrix, &avg, &std_dev, first = !ctr); /* evaluate and store the z-score */ z = (std_dev>1.e-5) ? (weight - avg)/std_dev : 0.0; clustering_score [ctr] = z; } free_matrix ((void **)adj_matrix); free (selection); return 0; }
int output_specs_for_refseq ( Options * optin, Protein * protein, Alignment * alignment, int * almt2protein, Node * leaf, double ** score, double cutoff , int ** is_precedent) { int node_id, pos, no_seqs = alignment->number_of_seqs; int ctr,number_of_nodes_above_cutoff ; int ctr1, ctr2, node_id_1, node_id_2; int node_ctr; int node_pos_ctr, ** node_related_position; Node * node_ptr, * first_inner_node; int * above_cutoff = NULL; char filename[BUFFLEN]; FILE * fptr; int print_leaves (FILE * fptr, Node * node); if ( ! ( above_cutoff = emalloc (no_seqs*sizeof(int) ) ) ) exit (1); if ( ! ( node_related_position = intmatrix ( no_seqs, alignment->length+1) ) ) exit (1); sprintf (filename, "%s.qry_specs", options->outname); fptr = efopen (filename, "w"); if ( !fptr) return 1; for ( pos=0; pos < alignment->length; pos++) { number_of_nodes_above_cutoff = 0; ctr = 0; for ( node_id= 1; node_id < no_seqs; node_id++ ) { if ( score[pos][node_id] > cutoff ){ above_cutoff [ctr] = node_id; ctr ++; } } if ( !ctr ) continue; above_cutoff [ctr] = -1; number_of_nodes_above_cutoff = ctr; for (ctr1=0; above_cutoff [ctr1] >= 0; ctr1++ ) { node_id_1 = above_cutoff [ctr1]; if ( ! node_id_1 ) continue; for (ctr2 = ctr1+1; above_cutoff [ctr2] >= 0 ; ctr2++ ) { node_id_2 = above_cutoff [ctr2]; if ( ! node_id_2 ) continue; /* go for the highest z */ /* if ( is_precedent[node_id_1][node_id_2] || is_precedent[node_id_2][node_id_1] ) { number_of_nodes_above_cutoff --; if ( score[pos][node_id_2] < score[pos][node_id_1] ) { above_cutoff [ctr2] = 0; } else { above_cutoff [ctr1] = 0; break; } }*/ /* alternatively, go for the ancestor */ if ( is_precedent[node_id_1][node_id_2] ) { number_of_nodes_above_cutoff --; above_cutoff [ctr2] = 0; } else if ( is_precedent[node_id_2][node_id_1] ) { number_of_nodes_above_cutoff --; above_cutoff [ctr1] = 0; break; } } } if ( number_of_nodes_above_cutoff <= 0) { printf (" %d foul\n", pos); for (ctr1=0; above_cutoff [ctr1] >= 0; ctr1++ ) { printf (" ** %d \n", above_cutoff [ctr1] ); } exit (1); } if ( number_of_nodes_above_cutoff > 1 ) { fprintf (fptr, "%4d %5s:", pos + 1, "discr" ); } else { fprintf (fptr, "%4d %5s:", pos + 1, "det" ); } for (ctr1=0; above_cutoff [ctr1] >= 0; ctr1++ ) { // printf (" ** %d \n", above_cutoff [ctr1] ); node_id_1 = above_cutoff [ctr1]; if ( ! node_id_1 ) continue; fprintf (fptr, " %4d", node_id_1); node_ctr = 2*no_seqs-1 - node_id_1; (leaf+node_ctr) -> marked = 1; node_related_position[node_id_1][0] ++; node_pos_ctr = node_related_position[node_id_1][0]; /* use the zeroth pos as a counter */ node_related_position[node_id_1][ node_pos_ctr ] = pos + 1; } fprintf (fptr, "\n"); } fprintf ( fptr, "\n\n"); fprintf ( fptr, "nodes:\n=============\n\n"); first_inner_node = leaf + no_seqs; for ( node_ctr = 0; node_ctr < no_seqs-1; node_ctr ++ ) { node_ptr = first_inner_node + node_ctr; if ( node_ptr->marked ) { fprintf ( fptr, "\n\t node %4d\n", node_ptr->id ); fprintf ( fptr, "\t positions:"); for ( node_pos_ctr=1; node_pos_ctr<= node_related_position[node_ptr->id][0]; node_pos_ctr++ ) { fprintf ( fptr, " %4d", node_related_position [node_ptr->id][ node_pos_ctr ]); } fprintf (fptr, "\n"); fprintf ( fptr, "\t sequences\n"); print_leaves ( fptr, node_ptr); } } fprintf (fptr, "\n"); free (above_cutoff); free_matrix ( (void*) node_related_position); //fclose (fptr); return 0; }
int main ( int argc, char * argv[]) { Options options; Protein protein; Alignment * alignment = NULL; int retval; int almtctr1, almtctr2; double **score = NULL, corr, pctg_gaps; double **clustering_score = NULL; double *area, *distance; int **rank_order= NULL,**res_rank=NULL,**int_cvg=NULL ; int ** correlated = NULL, **almt2prot = NULL, **prot2almt = NULL; /* command file is required */ if ( argc < 2 ) { fprintf ( stderr, "Usage: %s <command file>.\n", argv[0]); exit (0); } retval = read_cmd_file ( argv[1], &options); if (retval) exit(retval); retval = logger (&options, INTRO, ""); if (retval) exit(retval); /*******************************************/ /* */ /* PDB input */ /* */ /*******************************************/ if ( ! options.pdbname[0]) { fprintf (stderr, "%s cannot work without structure (cmd file was %s).\n", argv[0], argv[1]); exit (1); } else { /* warn if no chain given */ if ( !options.chain) { retval = logger (&options, WARN, "No chain specified. Using the first one."); if ( retval) exit (1); } if (retval) exit(retval); /* read in the structure */ retval = read_pdb (options.pdbname, &protein, options.chain); if (retval) exit(retval); } /*******************************************/ /* */ /* alignment scoring */ /* */ /*******************************************/ if ( ! ( alignment = emalloc ( options.no_of_alignments*sizeof(Alignment)) )) return 1; if ( ! ( score = emalloc ( options.no_of_alignments*sizeof(double*)) )) return 1; if ( ! ( rank_order = emalloc ( options.no_of_alignments*sizeof(int*)) )) return 1; if ( ! ( clustering_score = emalloc ( options.no_of_alignments*sizeof(double*)) )) return 1; if ( ! ( res_rank = emalloc ( options.no_of_alignments*sizeof(int*)) )) return 1; if ( ! ( int_cvg = emalloc ( options.no_of_alignments*sizeof(int*)) )) return 1; if ( ! ( almt2prot = emalloc ( options.no_of_alignments*sizeof(int*)) )) return 1; if ( ! ( prot2almt = emalloc ( options.no_of_alignments*sizeof(int*)) )) return 1; if ( ! ( area = emalloc ( options.no_of_alignments*sizeof(double)) )) return 1; if ( ! ( distance = emalloc ( options.no_of_alignments*sizeof(double)) )) return 1; printf ( "\t%8s %20s %8s %8s %8s \n", "almt#", "name ", "<dist to qry>", "%gaps", "area"); for ( almtctr1 = 0; almtctr1 < options.no_of_alignments; almtctr1++) { /* read in the alignment */ retval = read_clustalw (options.almtname[almtctr1], alignment + almtctr1); if (retval) exit(retval); /* pairwise distances btw the seqs */ retval = seq_pw_dist (alignment+almtctr1); if ( retval) return retval; /* average dist to the query in this alignment: */ distance[almtctr1] = avg_dist_to_special (&options, alignment + almtctr1); /* percentage of gaps in the alignment: */ pctg_gaps = (double) alignment->total_gaps/ ( (alignment+almtctr1)->length*(alignment+almtctr1)->number_of_seqs); /* make the residue scoring array */ score[almtctr1] = emalloc ( alignment[almtctr1].length*sizeof(double)); /* fill in the score array */ scoring (&options, alignment+almtctr1, score[almtctr1]); /* translate the scoring into rank order */ rank_order[almtctr1] = emalloc ( alignment[almtctr1].length*sizeof(int)); score2rank (score[almtctr1], rank_order[almtctr1], alignment[almtctr1].length); /* mapping between the protein and the alignment almtctr1 */ if ( ! (almt2prot[almtctr1] = (int *) emalloc (alignment[almtctr1].length*sizeof(int))) )exit (1); if ( ! (prot2almt[almtctr1] = (int *) emalloc (protein.length*sizeof(int))) )exit (1); retval = struct_almt_mapping (&protein, alignment+almtctr1, options.query, prot2almt[almtctr1], almt2prot[almtctr1]); if (retval) exit(retval); /* find coverage info implied by the scoring array */ if ( ! (res_rank[almtctr1] = (int*) emalloc (protein.length*sizeof(int))) ) exit (1); if ( ! (int_cvg[almtctr1] = (int*) emalloc (protein.length*sizeof(int))) ) exit (1); coverage ( &protein, almt2prot[almtctr1], score[almtctr1], alignment[almtctr1].length, res_rank[almtctr1], int_cvg[almtctr1] ); /*clustering score*/ clustering_score[almtctr1] = (double*) emalloc (protein.length*sizeof(double)); if (!clustering_score[almtctr1]) exit(retval); clustering ( &protein, res_rank[almtctr1], int_cvg[almtctr1], clustering_score[almtctr1]); /* cumulative clustering score*/ area[almtctr1] = area_over_coverage (int_cvg[almtctr1], clustering_score[almtctr1], protein.length); printf ( "\t %4d %20s %8.3lf %8.3lf %8.3lf \n", almtctr1, options.almtname[almtctr1], distance[almtctr1], pctg_gaps, area[almtctr1]); } /* find the table of correlations */ if ( ! (correlated = intmatrix ( options.no_of_alignments, options.no_of_alignments) ) ) return 1; for ( almtctr1 = 0; almtctr1 < options.no_of_alignments -1; almtctr1++) { correlated[almtctr1][almtctr1] = 1; for ( almtctr2 = almtctr1+1; almtctr2 < options.no_of_alignments; almtctr2++) { if ( alignment[almtctr1].length != alignment[almtctr2].length ) { fprintf ( stderr, "Error alignments in the files %s and %s ", options.almtname[almtctr1], options.almtname[almtctr2]); fprintf ( stderr, "seem to be of unequal length: %d and %d.\n", alignment[almtctr1].length , alignment[almtctr2].length); return 1; } corr = spearman ( rank_order[almtctr1], rank_order[almtctr2], alignment[almtctr1].length ); printf ( " %3d %3d %8.4lf\n", almtctr1, almtctr2, corr); correlated[almtctr1][almtctr2] = ( corr > 0.9 ); } } /* find corelated clusters (of sequence selections)*/ { int *cluster_count_per_size; int no_of_clusters; int max_size, secnd_max_size , ** cluster; int size = options.no_of_alignments; int i,j; double dist, ar, max_area, dist_at_max_area; double min_dist_at_max_area, min_dist, max_area_at_min_dist; int almt_no, min_dist_almt; int cluster_counter (int no_of_things, int *neighbors[], int cluster_count_per_size[], int * no_of_clusters, int * max_size, int * secnd_max_size , int * cluster[]); if ( ! ( cluster_count_per_size = emalloc (size*sizeof(int)))) return 1; if ( ! (cluster = intmatrix ( size+1, size+1) ) ) return 1; retval = cluster_counter (size, correlated, cluster_count_per_size, &no_of_clusters, & max_size, &secnd_max_size , cluster); if ( retval ) return 1; printf ( "number of clusters: %d \n", no_of_clusters); for (i=0; i<=size; i++ ) { if ( ! cluster[i][0] ) continue; if ( !i ) { printf ( "\t isolated:\n"); } else { printf ("\t cluster size: %3d \n", cluster[i][0]); } for (j=1; j <= cluster[i][0]; j++ ) { printf ( "%3d ", cluster[i][j] ); } printf ( "\n"); } /* which cluster is the closest to the singled out sequence ("special") */ min_dist_at_max_area = dist_at_max_area = 10; max_area_at_min_dist = min_dist = -10; min_dist_almt = -1; for (i=0; i<=size; i++ ) { if ( ! cluster[i][0] ) continue; max_area = -100; almt_no = dist_at_max_area = -1; for (j=1; j <= cluster[i][0]; j++ ) { dist = distance[cluster[i][j]] ; ar = area[cluster[i][j]] ; if ( max_area < ar ) { max_area = ar; dist_at_max_area = dist; almt_no = cluster[i][j]; } } if ( almt_no < 0 ) { fprintf ( stderr, "Error selecting the alignment (1)\n"); exit (1); } if ( min_dist_at_max_area > dist_at_max_area ) { min_dist = dist_at_max_area; max_area_at_min_dist = max_area; min_dist_almt = almt_no; } } if ( min_dist_almt < 0 ) { fprintf ( stderr, "Error selecting the alignment (2)\n"); exit (1); } printf ( "choosing alignment %d %s (distance: %5.3f area: %6.3f)\n", min_dist_almt, options.almtname[min_dist_almt], min_dist, max_area_at_min_dist); free (cluster_count_per_size); free_matrix ( (void **) cluster); } free (score); logger ( &options, NOTE, ""); return 0; }
int output_specs_score ( Options *options, Protein * protein, Alignment * alignment, int *almt2prot, Node * leaf, double ** score, double ** complement_score, double ** p_value, double **probability, double **overlap, double cutoff, int ** is_precedent) { # if 0 char *base_filename = options ->outname; int node_id, pos, no_seqs = alignment->number_of_seqs; int node_id_2; int node_ctr,* print, first; int ** related; int int_max_gaps = MAX_GAPS*no_seqs; Node *root; int refseq_number, refseq_2_number = -1; char filename[BUFFLEN]; char in_qry2; FILE * fptr; int print_leaves (FILE * fptr, Node * node); int recursive_cleanup (Node *node, int * related, double * probability); if ( ! ( related = intmatrix (alignment->length+1, no_seqs ) ) ) exit (1); if ( ! ( print = emalloc (no_seqs*sizeof(int) ) ) ) exit (1); printf ("in output\n"); #if 1 sprintf (filename, "%s.specs", base_filename); fptr = efopen (filename, "w"); if ( !fptr) return 1; # else fptr = stdout; # endif if ( ! options->refseq ) { fprintf ( stderr, "Please define refseq.\n"); exit (0); } /* locate the refseq */ refseq_number = find_refseq (alignment, options->refseq); if ( options->qry2[0]) refseq_2_number = find_refseq (alignment, options->qry2); /* associate nodes and positions */ for ( node_id = 2; node_id < no_seqs; node_id++ ) { node_ctr = 2*no_seqs -1 - node_id; if ( (leaf+node_ctr)-> number_of_leaves < MIN_NO_LEAVES) continue; for ( pos=0; pos < alignment->length; pos++) { if ( alignment->column_gaps[pos] > int_max_gaps) continue; if ( overlap[pos][node_id] < MAX_OVERLAP && probability[pos][node_id] <= MAX_PROB) { related[pos][node_id] = 1; } } } /* recursive cleanup (so parents and children do not appear for the same reason */ for ( pos=0; pos < alignment->length; pos++) { if ( alignment->column_gaps[pos] > int_max_gaps ) continue; recursive_cleanup ( root = leaf+2*no_seqs-1 - 1, related[pos], probability[pos] ) ; } /* which nodes should be printed? */ for ( node_id = 2; node_id < no_seqs; node_id++ ) { for ( pos=0; pos < alignment->length; pos++) { if ( alignment->column_gaps[pos] > int_max_gaps ) continue; if ( related[pos][node_id] ) { print[node_id] = 1; break; } } } for ( node_id = 2; node_id < no_seqs; node_id++ ) { node_ctr = 2*no_seqs-1 - node_id; if ( ! print[node_id] ) continue; fprintf ( fptr, "\n\nnode %d\n=============\n", node_id); fprintf ( fptr, " %4s %4s %4s %4s %5s %5s %5s %5s other nodes\n", "pos", "pdbid", "qry1", "qry2", "entr", "c_entr", "prob", "ovlp"); for ( pos=0; pos < alignment->length; pos++) { if ( ! related[pos][node_id] ) continue; in_qry2 = (options->qry2[0]) ? alignment->sequence[refseq_2_number][pos]: '-'; fprintf ( fptr, " %4d %4s %1c %1c %5.1lf %5.1lf %5.1le %5.2lf ", pos+1, protein->sequence[ almt2prot[pos]].pdb_id, alignment->sequence[refseq_number][pos], in_qry2, score[pos][node_id], complement_score[pos][node_id], probability[pos][node_id], overlap[pos][node_id] ); /* other nodes which have this same pos as discriminant */ first = 1; for ( node_id_2 = 2; node_id_2 < no_seqs; node_id_2 ++ ) { if ( node_id_2 == node_id ) continue; if ( related[pos][node_id_2] ) { if ( ! first ) { fprintf ( fptr, ","); } fprintf ( fptr, "%d",node_id_2); first = 0; } } fprintf ( fptr, "\n"); } } fclose (fptr); /* print out leaves */ sprintf (filename, "%s.leaves", base_filename); fptr = efopen (filename, "w"); if ( !fptr) return 1; for ( node_id = 2; node_id < no_seqs; node_id++ ) { if ( ! print[node_id] ) continue; node_ctr = 2*no_seqs -1 - node_id; fprintf ( fptr, "\n\nnode %d\n=============\n", node_id); print_leaves ( fptr, leaf+node_ctr); fprintf (fptr, "\n"); } fclose (fptr); # endif return 0; }
int find_best_triples_exhaustive_parallel(Representation* X_rep, Representation* Y_rep, int no_top_rmsd, double * best_rmsd, int ** best_triple_x, int ** best_triple_y, double **best_quat) { // initialization of global array of values no_top_rmsd = TOP_RMSD; // printf("%d\n", no_top_rmsd); // printf("proba %d %lf\n", X_rep->N_full, X_rep->cm[0][0]); double ** best_quat_array = dmatrix(no_top_rmsd * NUM_THREADS, 4); int ** best_triple_x_array = intmatrix(no_top_rmsd * NUM_THREADS, 3); int ** best_triple_y_array = intmatrix(no_top_rmsd * NUM_THREADS, 3); double * best_rmsd_array = (double *) malloc(no_top_rmsd * NUM_THREADS * sizeof (double)); int cnt; for (cnt = 0; cnt < NUM_THREADS * no_top_rmsd; ++cnt) { best_rmsd_array[cnt] = BAD_RMSD + 1; best_triple_x_array[cnt][0] = -1; } omp_set_num_threads(NUM_THREADS); #pragma omp parallel { int top_ctr, i, j, k, l, n, m; int myid = omp_get_thread_num(); double ** best_quat_local = dmatrix(no_top_rmsd, 4); int ** best_triple_x_local = intmatrix(no_top_rmsd, 3); int ** best_triple_y_local = intmatrix(no_top_rmsd, 3); double * best_rmsd_local = (double *) malloc(no_top_rmsd * sizeof (double)); double **x = X_rep->full; // no change int * x_type = X_rep->full_type; // no change int NX = X_rep->N_full; // no change double **y = Y_rep->full; int * y_type = Y_rep->full_type; int NY = Y_rep->N_full; int x_triple[3], y_triple[3]; int chunk; double cutoff_rmsd = 3.0; /* <<<<<<<<<<<<<<<<< hardcoded */ double rmsd; // double q_init[4] = {0.0}; // no change double ** cmx = X_rep->cm; // no change double ** cmy = Y_rep->cm; // no change double threshold_dist = THRESHOLD; /***************************************/ /* find reasonable triples of SSEs */ /* that correspond in type */ /* and can be mapped onto each other */ /***************************************/ for (top_ctr = 0; top_ctr < no_top_rmsd; top_ctr++) { best_rmsd_local[top_ctr] = BAD_RMSD + 1; best_triple_x_local[top_ctr][0] = -1; } /* * Exhaustive search through a 6D space - ugly code * Parallelization */ #pragma omp for for (i = 0; i < NX; ++i) { for (j = 0; j < NY - 2; ++j) { if (x_type[i] != y_type[j]) continue; for (k = 0; k < NX; ++k) { if (k == i) continue; if (two_point_distance(cmx[i], cmx[k]) > THRESHOLD) continue; for (l = j + 1; l < NY - 1; ++l) { if (x_type[k] != y_type[l]) continue; if (two_point_distance(cmy[j], cmy[l]) > THRESHOLD) continue; for (m = 0; m < NX; ++m) { if (m == k || m == i) continue; if (two_point_distance(cmx[i], cmx[m]) > THRESHOLD) continue; if (two_point_distance(cmx[k], cmx[m]) > THRESHOLD) continue; for (n = l + 1; n < NY; ++n) { if (x_type[m] != y_type[n]) continue; if (two_point_distance(cmy[j], cmy[n]) > THRESHOLD) continue; if (two_point_distance(cmy[l], cmy[n]) > THRESHOLD) continue; x_triple[0] = i; y_triple[0] = j; x_triple[1] = k; y_triple[1] = l; x_triple[2] = m; y_triple[2] = n; if (!same_hand_triple(X_rep, x_triple, Y_rep, y_triple, 3)) continue; if (distance_of_nearest_approach(X_rep, x_triple, Y_rep, y_triple, 3, &rmsd)) continue; if (rmsd > cutoff_rmsd) continue; //if (opt_quat(x, NX, x_triple, y, NY, y_triple, 3, q_init, &rmsd)) continue; for (top_ctr = 0; top_ctr < no_top_rmsd; top_ctr++) { // insertion of a new values in arrays keeping arrays sorted if (rmsd <= best_rmsd_local[top_ctr]) { chunk = no_top_rmsd - top_ctr - 1; if (chunk) { memmove(best_rmsd_local + top_ctr + 1, best_rmsd_local + top_ctr, chunk * sizeof(double)); memmove(best_quat_local[top_ctr + 1], best_quat_local[top_ctr], chunk * 4 * sizeof(double)); memmove(best_triple_x_local[top_ctr + 1], best_triple_x_local[top_ctr], chunk * 3 * sizeof (int)); memmove(best_triple_y_local[top_ctr + 1], best_triple_y_local[top_ctr], chunk * 3 * sizeof (int)); } best_rmsd_local[top_ctr] = rmsd; memcpy(best_quat_local[top_ctr], q_init, 4 * sizeof (double)); memcpy(best_triple_x_local[top_ctr], x_triple, 3 * sizeof (int)); memcpy(best_triple_y_local[top_ctr], y_triple, 3 * sizeof (int)); break; } } } } } } } // printf("%d\n", i); // each thread copies values to global arrays in accordance with its thread id // printf("myid: %d\n", myid); memcpy(*(best_quat_array + myid * no_top_rmsd), *(best_quat_local), no_top_rmsd * 4 * sizeof (double)); memcpy(*(best_triple_y_array + myid * no_top_rmsd), *(best_triple_y_local), no_top_rmsd * 3 * sizeof (int)); memcpy(*(best_triple_x_array + myid * no_top_rmsd), *(best_triple_x_local), no_top_rmsd * 3 * sizeof (int)); memcpy(best_rmsd_array + myid*no_top_rmsd, best_rmsd_local, no_top_rmsd * sizeof (double)); } free_dmatrix(best_quat_local); free_imatrix(best_triple_x_local); free_imatrix(best_triple_y_local); free(best_rmsd_local); // parallel sort of elements of arrays sortTriplets(best_triple_x_array, best_triple_y_array, best_rmsd_array, best_quat_array, no_top_rmsd); } // /* memcpy(*best_quat, *best_quat_array, no_top_rmsd * 4 * sizeof(double)); */ memcpy(*best_triple_y, *best_triple_y_array, no_top_rmsd * 3 * sizeof (int)); memcpy(*best_triple_x, *best_triple_x_array, no_top_rmsd * 3 * sizeof (int)); memcpy(best_rmsd, best_rmsd_array, no_top_rmsd * sizeof (double)); free_dmatrix(best_quat_array); free_imatrix(best_triple_x_array); free_imatrix(best_triple_y_array); free(best_rmsd_array); return 0; }
int complement_match (Representation* X_rep, Representation* Y_rep, Map * map, int map_max, int * map_ctr, int * map_best, int best_max, int parent_map){ Penalty_parametrization penalty_params; /* for SW */ double **x = X_rep->full; int * x_type = X_rep->full_type; int NX = X_rep->N_full; double **y = Y_rep->full; int * y_type = Y_rep->full_type; int NY = Y_rep->N_full; double F_effective = 0.0; double F_current; double q[4] = {0.0}, q_init[4] = {0.0}; double **x_rotated = NULL; double **tr_x_rotated = NULL; double **R; double z_scr = 0.0, *z_best; double avg, avg_sq, stdev; double alpha = options.alpha; double rmsd, best_rmsd[TOP_RMSD]; double **best_quat; double cutoff_rmsd = 3.0; /* <<<<<<<<<<<<<<<<< hardcoded */ int *x_type_fudg, *y_type_fudg; int *anchor_x, *anchor_y, no_anchors; int no_top_rmsd = TOP_RMSD, chunk; int x_ctr, y_ctr, top_ctr; int **best_triple_x; int **best_triple_y; int x_triple[3], y_triple[3]; int retval, done = 0; int best_ctr; int i, j; int t; int smaller; int my_map_ctr; int stored_new; int * x2y, map_unstable; //time_t time_now, time_start; int cull_by_dna (Representation * X_rep, int *set_of_directions_x, Representation * Y_rep, int *set_of_directions_y, int set_size, Map *map, double cutoff_rmsd); int distance_of_nearest_approach (Representation * X_rep, int *set_of_directions_x, Representation * Y_rep, int *set_of_directions_y, int set_size, double * rmsd_ptr); int same_hand_triple (Representation * X_rep, int *set_of_directions_x, Representation * Y_rep, int *set_of_directions_y, int set_size); int find_map (Penalty_parametrization * params, Representation *X_rep, Representation *Y_rep, double ** R, double alpha, double * F_effective, Map *map, int *anchor_x, int * anchor_y, int anchor_size ); int find_next_triple (double **X, double **Y, int *x_type, int *y_type, int NX, int NY, int *x_triple, int *y_triple); int gradient_descent (int first_call, double alpha, double **x, int * x_type, int NX, double **y, int * y_type, int NY, double *q_best, double *F_best_ptr) ; int map_quality_metrics (Representation *X_rep, Representation *Y_rep, double ** tr_x_rotated, Map * map, int *reasonable_angle_ct); int monte_carlo (double alpha, double **x, int * x_type, int NX, double **y, int * y_type, int NY, double *q_best, double *F_best_ptr); int opt_quat (double ** x, int NX, int *set_of_directions_x, double ** y, int NY, int *set_of_directions_y, int set_size, double * q, double * rmsd); int qmap (double *x0, double *x1, double *y0, double *y1, double * quat); int store_sorted (Map * map, int NX, int NY, int *map_best, int map_max, double * z_best, int best_ctr, double z_scr, int my_map_ctr, int *stored); map_best[0] = -1; /* it is the end-of-array flag */ if ( *map_ctr >= map_max ) { fprintf (stderr, "Map array undersized.\n"); exit (1); } smaller = (NX <= NY) ? NX : NY; /***********************/ /* memory allocation */ /***********************/ if ( ! (R=dmatrix(3,3) ) ) return 1; /* compiler is bugging me otherwise */ if ( ! (x_rotated = dmatrix (NX,3)) ) return 1; if ( ! (tr_x_rotated = dmatrix (NX,3)) ) return 1; if ( ! (best_quat = dmatrix (no_top_rmsd,4)) ) return 1; if ( ! (best_triple_x = intmatrix (no_top_rmsd,3)) ) return 1; if ( ! (best_triple_y = intmatrix (no_top_rmsd,3)) ) return 1; if ( ! (z_best = emalloc(NX*NY*sizeof(double) )) ) return 1; if ( ! (x_type_fudg = emalloc(NX*sizeof(int) )) ) return 1; if ( ! (y_type_fudg = emalloc(NY*sizeof(int) )) ) return 1; if ( ! (anchor_x = emalloc(NX*sizeof(int) )) ) return 1; if ( ! (anchor_y = emalloc(NY*sizeof(int) )) ) return 1; penalty_params.custom_gap_penalty_x = NULL; penalty_params.custom_gap_penalty_y = NULL; //if ( ! (penalty_params.custom_gap_penalty_x = emalloc(NX*sizeof(double) )) ) return 1; //if ( ! (penalty_params.custom_gap_penalty_y = emalloc(NY*sizeof(double) )) ) return 1; /***********************/ /***********************/ /* expected quantities */ /***********************/ avg = avg_sq = stdev = 0.0; //if (options.postprocess) { if (0) { if (F_moments (x, x_type, NX, y, y_type, NY, alpha, &avg, &avg_sq, &stdev)) return 1; } /***********************/ /***********************/ /* initialization */ /***********************/ best_ctr = 0; penalty_params.gap_opening = options.gap_open; penalty_params.gap_extension = options.gap_extend; penalty_params.endgap = options.endgap; penalty_params.endgap_special_treatment = options.use_endgap; /***********************/ /***************************************/ /* find reasonble triples of SSEs */ /* that correspond in type */ /* and can be mapped onto each other */ /***************************************/ for (top_ctr=0; top_ctr<no_top_rmsd; top_ctr++) { best_rmsd[top_ctr] = BAD_RMSD+1; best_triple_x[top_ctr][0] = -1; } for (x_ctr=0; x_ctr < NX-2 && !done; x_ctr++) { for (y_ctr=0; y_ctr < NY-2 && !done; y_ctr++) { if ( y_type[y_ctr] != x_type[x_ctr] ) continue; x_triple[0] = x_ctr; y_triple[0] = y_ctr; if (find_next_triple (x, y, x_type, y_type, NX, NY, x_triple, y_triple) ){ continue; } if ( x_triple[1] < 0 || x_triple[2] < 0 ) continue; if ( y_triple[1] < 0 || y_triple[2] < 0 ) continue; /* do these three have kind-of similar layout in space?*/ /* is handedness the same? */ if ( ! same_hand_triple ( X_rep, x_triple, Y_rep, y_triple, 3)) continue; /* are distances comparab;e? */ if (distance_of_nearest_approach ( X_rep, x_triple, Y_rep, y_triple, 3, &rmsd)) continue; if ( rmsd > cutoff_rmsd) continue; /* find q_init that maps the two triples as well as possible*/ if ( opt_quat ( x, NX, x_triple, y, NY, y_triple, 3, q_init, &rmsd)) continue; for (top_ctr=0; top_ctr<no_top_rmsd; top_ctr++) { if ( rmsd <= best_rmsd[top_ctr] ) { chunk = no_top_rmsd - top_ctr -1; if (chunk) { memmove (best_rmsd+top_ctr+1, best_rmsd+top_ctr, chunk*sizeof(double)); memmove (best_quat[top_ctr+1], best_quat[top_ctr], chunk*4*sizeof(double)); memmove (best_triple_x[top_ctr+1], best_triple_x[top_ctr], chunk*3*sizeof(int)); memmove (best_triple_y[top_ctr+1], best_triple_y[top_ctr], chunk*3*sizeof(int)); } best_rmsd[top_ctr] = rmsd; memcpy (best_quat[top_ctr], q_init, 4*sizeof(double)); memcpy (best_triple_x[top_ctr], x_triple, 3*sizeof(int)); memcpy (best_triple_y[top_ctr], y_triple, 3*sizeof(int)); break; } } } } # if 0 for (top_ctr=0; top_ctr<no_top_rmsd; top_ctr++) { if ( best_rmsd[top_ctr] > BAD_RMSD ) break; printf (" %3d %8.3lf ", top_ctr, best_rmsd[top_ctr]); vec_out ( best_quat[top_ctr], 4, "quat: "); for (t=0; t<3; t++ ) { printf ("\t %3d %3d \n", best_triple_x[top_ctr][t]+1, best_triple_y[top_ctr][t]+1 ); } } exit (1); # endif /*********************************************/ /* main loop */ /*********************************************/ for (top_ctr=0; top_ctr<no_top_rmsd; top_ctr++) { if ( best_rmsd[top_ctr] > BAD_RMSD ) break; quat_to_R (best_quat[top_ctr], R); rotate (x_rotated, NX, R, x); F_current = F( y, y_type, NY, x_rotated, x_type, NX, alpha); # if 0 printf ("\n***********************************\n"); printf (" %3d %8.3lf %8.3lf ", top_ctr, best_rmsd[top_ctr], F_current); vec_out ( best_quat[top_ctr], 4, "quat: "); for (t=0; t<3; t++ ) { printf ("\t %3d %3d \n", best_triple_x[top_ctr][t]+1, best_triple_y[top_ctr][t]+1 ); } # endif /* find map which uses the 2 triples as anchors */ no_anchors = 3; find_map (&penalty_params, X_rep, Y_rep, R, alpha, &F_effective, map + (*map_ctr), best_triple_x[top_ctr], best_triple_y[top_ctr], no_anchors); x2y = ( map + (*map_ctr) ) ->x2y; map_unstable = 0; for (t=0; t<3; t++ ) { if ( x2y[best_triple_x[top_ctr][t]] != best_triple_y[top_ctr][t] ) { map_unstable = 1; } } if ( map_unstable) continue; /* dna here is not DNA but "distance of nearest approach" */ cull_by_dna ( X_rep, best_triple_x[top_ctr], Y_rep, best_triple_y[top_ctr], 3, map + (*map_ctr), cutoff_rmsd ); //printf ("map after culling by dna:\n"); //print_map (stdout, map+ (*map_ctr), NULL, NULL, NULL, NULL, 1); /* monte that optimizes the aligned vectors only */ for (i=0; i<NX; i++) { x_type_fudg[i] = JACKFRUIT; } for (j=0; j<NY; j++) { y_type_fudg[j] = JACKFRUIT*2; } no_anchors = 0; for (i=0; i<NX; i++) { j = (map+(*map_ctr))->x2y[i]; if (j < 0 ) continue; x_type_fudg[i] = x_type[i]; y_type_fudg[j] = y_type[j]; anchor_x[no_anchors] = i; anchor_y[no_anchors] = j; no_anchors ++; } if ( opt_quat ( x, NX, anchor_x, y, NY, anchor_y, no_anchors, q, &rmsd)) continue; retval = monte_carlo ( alpha, x, x_type_fudg, NX, y, y_type_fudg, NY, q, &F_current); if (retval) return retval; if (options.postprocess) { z_scr = stdev ? (F_current - avg)/stdev : 0.0; } else { z_scr = 0.0; } quat_to_R (q, R); /* store_image() is waste of time, but perhaps not critical */ store_image (X_rep, Y_rep, R, alpha, map + (*map_ctr)); map_assigned_score ( X_rep, map + (*map_ctr)); //printf ("map %2d assigned score: %8.3lf z_score: %8.3lf \n\n", // *map_ctr+1, (map + (*map_ctr)) -> assigned_score, z_scr); /* store the map that passed all the filters down to here*/ my_map_ctr = *map_ctr; map[my_map_ctr].F = F_current; map[my_map_ctr].avg = avg; map[my_map_ctr].avg_sq = avg_sq; map[my_map_ctr].z_score = z_scr; memcpy ( map[my_map_ctr].q, q, 4*sizeof(double) ); /* recalculate the assigned score*/ //if (top_ctr==24) exit (1); /************************/ /* store sorted */ /************************/ /* find the place for the new z-score */ store_sorted (map, NX, NY, map_best, map_max, z_best, best_ctr, -map[my_map_ctr].assigned_score, my_map_ctr, &stored_new); if ( stored_new ) { /* we want to keep this map */ (*map_ctr) ++; best_ctr++; } /* otherwise this map space is reusable */ /* is this pretty much as good as it can get ? */ if ( fabs (map[my_map_ctr].assigned_score - smaller) < options.tol ) done = 1; } map_best[best_ctr] = -1; /******************************************************/ /* look for the sub-map of a couple of best hits */ /******************************************************/ /* initialization:*/ map_consistence ( NX, NY, NULL, NULL, NULL, NULL, NULL); best_ctr = 0; while ( map_best[best_ctr] > -1 ) { best_ctr ++; } //exit (1); if (best_ctr) { int nr_maps = (best_ctr<options.number_maps_cpl)? best_ctr : options.number_maps_cpl; int best_i; int consistent; double z; double total_assigned_score, score, best_score = -100; double gap_score; for (i=0; i<nr_maps; i++) { /* look for the complement */ best_i = map_best[i]; /*intialize the (list of) submatch map(s) */ if ( !map[best_i].submatch_best) { /* for now look for a single map only */ /* TODO - would it be worth any to look at more maps?*/ int map_max = 1; map[best_i].submatch_best = emalloc (map_max*sizeof(int) ); if (! map[best_i].submatch_best) return 1; } map[best_i].submatch_best[0] = -1; map[best_i].score_with_children = 0; map[best_i].compl_z_score = 0; for (j=0; j<best_ctr; j++) { if (i==j) continue; map_complementarity ( map+best_i, map + map_best[j], &z); map_consistence ( NX, NY, map+best_i, map + map_best[j], &total_assigned_score, &gap_score, NULL); consistent = ( (map+best_i)->assigned_score < total_assigned_score && (map + map_best[j])->assigned_score < total_assigned_score); if ( consistent ) { score = total_assigned_score; if ( score > best_score ) { best_score = score; map[best_i].submatch_best[0] = map_best[j]; map[best_i].score_with_children = total_assigned_score; map[best_i].compl_z_score = z; } } } } } /**********************/ /* garbage collection */ gradient_descent (1, 0.0, NULL, NULL, 0, NULL, NULL, 0, NULL, NULL); free_dmatrix (R); free_dmatrix (x_rotated); free_dmatrix (tr_x_rotated); free_dmatrix (best_quat); free_imatrix (best_triple_x); free_imatrix (best_triple_y); free (z_best); free (x_type_fudg); free (y_type_fudg); free (anchor_x); free (anchor_y); if (penalty_params.custom_gap_penalty_x) free (penalty_params.custom_gap_penalty_x); if (penalty_params.custom_gap_penalty_y) free (penalty_params.custom_gap_penalty_y); /*********************/ return 0; }