int complement_match (Representation* X_rep, Representation* Y_rep, Map * map, int map_max, int * map_ctr, int * map_best, int best_max, int parent_map){ Penalty_parametrization penalty_params; /* for SW */ double **x = X_rep->full; int * x_type = X_rep->full_type; int NX = X_rep->N_full; double **y = Y_rep->full; int * y_type = Y_rep->full_type; int NY = Y_rep->N_full; double F_effective = 0.0; double F_current; double q[4] = {0.0}, q_init[4] = {0.0}; double **x_rotated = NULL; double **tr_x_rotated = NULL; double **R; double z_scr = 0.0, *z_best; double avg, avg_sq, stdev; double alpha = options.alpha; double rmsd, best_rmsd[TOP_RMSD]; double **best_quat; double cutoff_rmsd = 3.0; /* <<<<<<<<<<<<<<<<< hardcoded */ int *x_type_fudg, *y_type_fudg; int *anchor_x, *anchor_y, no_anchors; int no_top_rmsd = TOP_RMSD, chunk; int x_ctr, y_ctr, top_ctr; int **best_triple_x; int **best_triple_y; int x_triple[3], y_triple[3]; int retval, done = 0; int best_ctr; int i, j; int t; int smaller; int my_map_ctr; int stored_new; int * x2y, map_unstable; //time_t time_now, time_start; int cull_by_dna (Representation * X_rep, int *set_of_directions_x, Representation * Y_rep, int *set_of_directions_y, int set_size, Map *map, double cutoff_rmsd); int distance_of_nearest_approach (Representation * X_rep, int *set_of_directions_x, Representation * Y_rep, int *set_of_directions_y, int set_size, double * rmsd_ptr); int same_hand_triple (Representation * X_rep, int *set_of_directions_x, Representation * Y_rep, int *set_of_directions_y, int set_size); int find_map (Penalty_parametrization * params, Representation *X_rep, Representation *Y_rep, double ** R, double alpha, double * F_effective, Map *map, int *anchor_x, int * anchor_y, int anchor_size ); int find_next_triple (double **X, double **Y, int *x_type, int *y_type, int NX, int NY, int *x_triple, int *y_triple); int gradient_descent (int first_call, double alpha, double **x, int * x_type, int NX, double **y, int * y_type, int NY, double *q_best, double *F_best_ptr) ; int map_quality_metrics (Representation *X_rep, Representation *Y_rep, double ** tr_x_rotated, Map * map, int *reasonable_angle_ct); int monte_carlo (double alpha, double **x, int * x_type, int NX, double **y, int * y_type, int NY, double *q_best, double *F_best_ptr); int opt_quat (double ** x, int NX, int *set_of_directions_x, double ** y, int NY, int *set_of_directions_y, int set_size, double * q, double * rmsd); int qmap (double *x0, double *x1, double *y0, double *y1, double * quat); int store_sorted (Map * map, int NX, int NY, int *map_best, int map_max, double * z_best, int best_ctr, double z_scr, int my_map_ctr, int *stored); map_best[0] = -1; /* it is the end-of-array flag */ if ( *map_ctr >= map_max ) { fprintf (stderr, "Map array undersized.\n"); exit (1); } smaller = (NX <= NY) ? NX : NY; /***********************/ /* memory allocation */ /***********************/ if ( ! (R=dmatrix(3,3) ) ) return 1; /* compiler is bugging me otherwise */ if ( ! (x_rotated = dmatrix (NX,3)) ) return 1; if ( ! (tr_x_rotated = dmatrix (NX,3)) ) return 1; if ( ! (best_quat = dmatrix (no_top_rmsd,4)) ) return 1; if ( ! (best_triple_x = intmatrix (no_top_rmsd,3)) ) return 1; if ( ! (best_triple_y = intmatrix (no_top_rmsd,3)) ) return 1; if ( ! (z_best = emalloc(NX*NY*sizeof(double) )) ) return 1; if ( ! (x_type_fudg = emalloc(NX*sizeof(int) )) ) return 1; if ( ! (y_type_fudg = emalloc(NY*sizeof(int) )) ) return 1; if ( ! (anchor_x = emalloc(NX*sizeof(int) )) ) return 1; if ( ! (anchor_y = emalloc(NY*sizeof(int) )) ) return 1; penalty_params.custom_gap_penalty_x = NULL; penalty_params.custom_gap_penalty_y = NULL; //if ( ! (penalty_params.custom_gap_penalty_x = emalloc(NX*sizeof(double) )) ) return 1; //if ( ! (penalty_params.custom_gap_penalty_y = emalloc(NY*sizeof(double) )) ) return 1; /***********************/ /***********************/ /* expected quantities */ /***********************/ avg = avg_sq = stdev = 0.0; //if (options.postprocess) { if (0) { if (F_moments (x, x_type, NX, y, y_type, NY, alpha, &avg, &avg_sq, &stdev)) return 1; } /***********************/ /***********************/ /* initialization */ /***********************/ best_ctr = 0; penalty_params.gap_opening = options.gap_open; penalty_params.gap_extension = options.gap_extend; penalty_params.endgap = options.endgap; penalty_params.endgap_special_treatment = options.use_endgap; /***********************/ /***************************************/ /* find reasonble triples of SSEs */ /* that correspond in type */ /* and can be mapped onto each other */ /***************************************/ for (top_ctr=0; top_ctr<no_top_rmsd; top_ctr++) { best_rmsd[top_ctr] = BAD_RMSD+1; best_triple_x[top_ctr][0] = -1; } for (x_ctr=0; x_ctr < NX-2 && !done; x_ctr++) { for (y_ctr=0; y_ctr < NY-2 && !done; y_ctr++) { if ( y_type[y_ctr] != x_type[x_ctr] ) continue; x_triple[0] = x_ctr; y_triple[0] = y_ctr; if (find_next_triple (x, y, x_type, y_type, NX, NY, x_triple, y_triple) ){ continue; } if ( x_triple[1] < 0 || x_triple[2] < 0 ) continue; if ( y_triple[1] < 0 || y_triple[2] < 0 ) continue; /* do these three have kind-of similar layout in space?*/ /* is handedness the same? */ if ( ! same_hand_triple ( X_rep, x_triple, Y_rep, y_triple, 3)) continue; /* are distances comparab;e? */ if (distance_of_nearest_approach ( X_rep, x_triple, Y_rep, y_triple, 3, &rmsd)) continue; if ( rmsd > cutoff_rmsd) continue; /* find q_init that maps the two triples as well as possible*/ if ( opt_quat ( x, NX, x_triple, y, NY, y_triple, 3, q_init, &rmsd)) continue; for (top_ctr=0; top_ctr<no_top_rmsd; top_ctr++) { if ( rmsd <= best_rmsd[top_ctr] ) { chunk = no_top_rmsd - top_ctr -1; if (chunk) { memmove (best_rmsd+top_ctr+1, best_rmsd+top_ctr, chunk*sizeof(double)); memmove (best_quat[top_ctr+1], best_quat[top_ctr], chunk*4*sizeof(double)); memmove (best_triple_x[top_ctr+1], best_triple_x[top_ctr], chunk*3*sizeof(int)); memmove (best_triple_y[top_ctr+1], best_triple_y[top_ctr], chunk*3*sizeof(int)); } best_rmsd[top_ctr] = rmsd; memcpy (best_quat[top_ctr], q_init, 4*sizeof(double)); memcpy (best_triple_x[top_ctr], x_triple, 3*sizeof(int)); memcpy (best_triple_y[top_ctr], y_triple, 3*sizeof(int)); break; } } } } # if 0 for (top_ctr=0; top_ctr<no_top_rmsd; top_ctr++) { if ( best_rmsd[top_ctr] > BAD_RMSD ) break; printf (" %3d %8.3lf ", top_ctr, best_rmsd[top_ctr]); vec_out ( best_quat[top_ctr], 4, "quat: "); for (t=0; t<3; t++ ) { printf ("\t %3d %3d \n", best_triple_x[top_ctr][t]+1, best_triple_y[top_ctr][t]+1 ); } } exit (1); # endif /*********************************************/ /* main loop */ /*********************************************/ for (top_ctr=0; top_ctr<no_top_rmsd; top_ctr++) { if ( best_rmsd[top_ctr] > BAD_RMSD ) break; quat_to_R (best_quat[top_ctr], R); rotate (x_rotated, NX, R, x); F_current = F( y, y_type, NY, x_rotated, x_type, NX, alpha); # if 0 printf ("\n***********************************\n"); printf (" %3d %8.3lf %8.3lf ", top_ctr, best_rmsd[top_ctr], F_current); vec_out ( best_quat[top_ctr], 4, "quat: "); for (t=0; t<3; t++ ) { printf ("\t %3d %3d \n", best_triple_x[top_ctr][t]+1, best_triple_y[top_ctr][t]+1 ); } # endif /* find map which uses the 2 triples as anchors */ no_anchors = 3; find_map (&penalty_params, X_rep, Y_rep, R, alpha, &F_effective, map + (*map_ctr), best_triple_x[top_ctr], best_triple_y[top_ctr], no_anchors); x2y = ( map + (*map_ctr) ) ->x2y; map_unstable = 0; for (t=0; t<3; t++ ) { if ( x2y[best_triple_x[top_ctr][t]] != best_triple_y[top_ctr][t] ) { map_unstable = 1; } } if ( map_unstable) continue; /* dna here is not DNA but "distance of nearest approach" */ cull_by_dna ( X_rep, best_triple_x[top_ctr], Y_rep, best_triple_y[top_ctr], 3, map + (*map_ctr), cutoff_rmsd ); //printf ("map after culling by dna:\n"); //print_map (stdout, map+ (*map_ctr), NULL, NULL, NULL, NULL, 1); /* monte that optimizes the aligned vectors only */ for (i=0; i<NX; i++) { x_type_fudg[i] = JACKFRUIT; } for (j=0; j<NY; j++) { y_type_fudg[j] = JACKFRUIT*2; } no_anchors = 0; for (i=0; i<NX; i++) { j = (map+(*map_ctr))->x2y[i]; if (j < 0 ) continue; x_type_fudg[i] = x_type[i]; y_type_fudg[j] = y_type[j]; anchor_x[no_anchors] = i; anchor_y[no_anchors] = j; no_anchors ++; } if ( opt_quat ( x, NX, anchor_x, y, NY, anchor_y, no_anchors, q, &rmsd)) continue; retval = monte_carlo ( alpha, x, x_type_fudg, NX, y, y_type_fudg, NY, q, &F_current); if (retval) return retval; if (options.postprocess) { z_scr = stdev ? (F_current - avg)/stdev : 0.0; } else { z_scr = 0.0; } quat_to_R (q, R); /* store_image() is waste of time, but perhaps not critical */ store_image (X_rep, Y_rep, R, alpha, map + (*map_ctr)); map_assigned_score ( X_rep, map + (*map_ctr)); //printf ("map %2d assigned score: %8.3lf z_score: %8.3lf \n\n", // *map_ctr+1, (map + (*map_ctr)) -> assigned_score, z_scr); /* store the map that passed all the filters down to here*/ my_map_ctr = *map_ctr; map[my_map_ctr].F = F_current; map[my_map_ctr].avg = avg; map[my_map_ctr].avg_sq = avg_sq; map[my_map_ctr].z_score = z_scr; memcpy ( map[my_map_ctr].q, q, 4*sizeof(double) ); /* recalculate the assigned score*/ //if (top_ctr==24) exit (1); /************************/ /* store sorted */ /************************/ /* find the place for the new z-score */ store_sorted (map, NX, NY, map_best, map_max, z_best, best_ctr, -map[my_map_ctr].assigned_score, my_map_ctr, &stored_new); if ( stored_new ) { /* we want to keep this map */ (*map_ctr) ++; best_ctr++; } /* otherwise this map space is reusable */ /* is this pretty much as good as it can get ? */ if ( fabs (map[my_map_ctr].assigned_score - smaller) < options.tol ) done = 1; } map_best[best_ctr] = -1; /******************************************************/ /* look for the sub-map of a couple of best hits */ /******************************************************/ /* initialization:*/ map_consistence ( NX, NY, NULL, NULL, NULL, NULL, NULL); best_ctr = 0; while ( map_best[best_ctr] > -1 ) { best_ctr ++; } //exit (1); if (best_ctr) { int nr_maps = (best_ctr<options.number_maps_cpl)? best_ctr : options.number_maps_cpl; int best_i; int consistent; double z; double total_assigned_score, score, best_score = -100; double gap_score; for (i=0; i<nr_maps; i++) { /* look for the complement */ best_i = map_best[i]; /*intialize the (list of) submatch map(s) */ if ( !map[best_i].submatch_best) { /* for now look for a single map only */ /* TODO - would it be worth any to look at more maps?*/ int map_max = 1; map[best_i].submatch_best = emalloc (map_max*sizeof(int) ); if (! map[best_i].submatch_best) return 1; } map[best_i].submatch_best[0] = -1; map[best_i].score_with_children = 0; map[best_i].compl_z_score = 0; for (j=0; j<best_ctr; j++) { if (i==j) continue; map_complementarity ( map+best_i, map + map_best[j], &z); map_consistence ( NX, NY, map+best_i, map + map_best[j], &total_assigned_score, &gap_score, NULL); consistent = ( (map+best_i)->assigned_score < total_assigned_score && (map + map_best[j])->assigned_score < total_assigned_score); if ( consistent ) { score = total_assigned_score; if ( score > best_score ) { best_score = score; map[best_i].submatch_best[0] = map_best[j]; map[best_i].score_with_children = total_assigned_score; map[best_i].compl_z_score = z; } } } } } /**********************/ /* garbage collection */ gradient_descent (1, 0.0, NULL, NULL, 0, NULL, NULL, 0, NULL, NULL); free_dmatrix (R); free_dmatrix (x_rotated); free_dmatrix (tr_x_rotated); free_dmatrix (best_quat); free_imatrix (best_triple_x); free_imatrix (best_triple_y); free (z_best); free (x_type_fudg); free (y_type_fudg); free (anchor_x); free (anchor_y); if (penalty_params.custom_gap_penalty_x) free (penalty_params.custom_gap_penalty_x); if (penalty_params.custom_gap_penalty_y) free (penalty_params.custom_gap_penalty_y); /*********************/ return 0; }
int find_map ( Penalty_parametrization * penalty_params, Representation *X_rep, Representation *Y_rep, double ** R, double alpha, double * F_effective, Map *map, int *anchor_x, int *anchor_y, int anchor_size) { int i, j, map_size, a; int NX=X_rep->N_full, NY=Y_rep->N_full; double aln_score; /* "sse_pair_score" is the matrix of values (exp weight)*(-1 for SSE mismatch) */ store_sse_pair_score (X_rep, Y_rep, R, alpha, map); /* if the anchors are given, somebody insists they should be considered as already aligned */ if (penalty_params->custom_gap_penalty_x) memset (penalty_params->custom_gap_penalty_x, 0, NX*sizeof(double) ); if (penalty_params->custom_gap_penalty_y) memset (penalty_params->custom_gap_penalty_y, 0, NY*sizeof(double) ); if (anchor_x && anchor_y) { for (a=0; a<anchor_size; a++){ i = anchor_x[a]; if (penalty_params->custom_gap_penalty_x) penalty_params->custom_gap_penalty_x[i] = options.far_far_away; for (j=0; j<NY; j++) { if ( j == anchor_y[a] ) continue; map->sse_pair_score[i][j] = options.far_far_away; map->cosine[i][j] = -0.9999; } j = anchor_y[a]; if (penalty_params->custom_gap_penalty_y) penalty_params->custom_gap_penalty_y[j] = options.far_far_away; for (i=0; i<NX; i++) { if ( i == anchor_x[a] ) continue; map->sse_pair_score [i][j] = options.far_far_away; map->cosine[i][j] = -0.9999; } } } /* dynamic programming using the sse_pair_score */ if (options.current_algorithm == SEQUENTIAL) { double **similarity_score = map->sse_pair_score; smith_waterman (penalty_params, NX, NY, similarity_score, map->x2y, map->y2x, &aln_score); } else { // shouldn't we have checked for this before? fprintf (stderr, "%s:%d: Unrecognized algorithm type.\n",__FILE__, __LINE__); exit (1); } //exit(1); map_assigned_score (X_rep, map); map -> avg_length_mismatch = 0; map_size = 0; for (i=0; i<NX; i++) { j = map->x2y[i]; if ( j<0) continue; if ( map->cosine[i][j] < options.far_away_cosine) { map->x2y[i] = options.far_far_away; map->y2x[j] = options.far_far_away; /*everything crashes; investigate later */ } else { map_size++; map->avg_length_mismatch += fabs(X_rep->length[i] - Y_rep->length[j]); } } map->avg_length_mismatch /= map_size; *F_effective = -map_size; return 0; }