Beispiel #1
0
int main ( int argc, char * argv[]) {

    Options options;
    Protein protein;
    Alignment * alignment = NULL;
    int retval;
    int almtctr1, almtctr2;
    double **score = NULL, corr,  pctg_gaps;
    double **clustering_score = NULL;
    double *area, *distance;
    int **rank_order= NULL,**res_rank=NULL,**int_cvg=NULL ;
    int ** correlated = NULL, **almt2prot = NULL, **prot2almt = NULL;
    /* command file is required */
    if ( argc < 2 ) {
	fprintf ( stderr, "Usage: %s <command file>.\n", argv[0]);
	exit (0);
    }
    retval = read_cmd_file ( argv[1], &options);
    if (retval) exit(retval);
    retval = logger (&options, INTRO, "");
    if (retval) exit(retval);
   

    /*******************************************/
    /*                                         */
    /*  PDB input                              */
    /*                                         */
    /*******************************************/
    if ( ! options.pdbname[0]) {
	fprintf (stderr, "%s cannot work without structure (cmd file was %s).\n",
		 argv[0], argv[1]);
	exit (1);
	
    } else {

	/* warn if no chain given */
	if ( !options.chain) {
	    retval = logger (&options, WARN, "No chain specified. Using the first one.");
	    if ( retval) exit (1);
	}
	if (retval) exit(retval);
	/* read in the structure */
	retval = read_pdb (options.pdbname, &protein, options.chain);
	if (retval) exit(retval);

   }

    
    /*******************************************/
    /*                                         */
    /*  alignment scoring                      */
    /*                                         */
    /*******************************************/
    if ( ! ( alignment = emalloc ( options.no_of_alignments*sizeof(Alignment)) )) return 1;
    if ( ! ( score = emalloc ( options.no_of_alignments*sizeof(double*)) )) return 1;
    if ( ! ( rank_order = emalloc ( options.no_of_alignments*sizeof(int*)) )) return 1;
    if ( ! ( clustering_score = emalloc ( options.no_of_alignments*sizeof(double*)) )) return 1;
    if ( ! ( res_rank = emalloc ( options.no_of_alignments*sizeof(int*)) )) return 1;
    if ( ! ( int_cvg = emalloc ( options.no_of_alignments*sizeof(int*)) )) return 1;
    if ( ! ( almt2prot = emalloc ( options.no_of_alignments*sizeof(int*)) )) return 1;
    if ( ! ( prot2almt = emalloc ( options.no_of_alignments*sizeof(int*)) )) return 1;
    if ( ! ( area = emalloc ( options.no_of_alignments*sizeof(double)) )) return 1;
    if ( ! ( distance = emalloc ( options.no_of_alignments*sizeof(double)) )) return 1;

    printf ( "\t%8s   %20s  %8s  %8s  %8s  \n", "almt#", "name        ",  "<dist to qry>", "%gaps", "area");
    
    for ( almtctr1 = 0; almtctr1 < options.no_of_alignments; almtctr1++) {

	/* read in the alignment */
	retval = read_clustalw (options.almtname[almtctr1], alignment + almtctr1);
	if (retval) exit(retval);
	/* pairwise distances btw the seqs */
	retval   = seq_pw_dist (alignment+almtctr1);
	if ( retval) return retval;
	/* average dist to the query in this alignment: */ 
	distance[almtctr1] = avg_dist_to_special (&options, alignment + almtctr1);
	/* percentage of gaps in the alignment: */
	pctg_gaps = (double) alignment->total_gaps/ ( (alignment+almtctr1)->length*(alignment+almtctr1)->number_of_seqs);
	/* make the residue scoring array */
	score[almtctr1] = emalloc ( alignment[almtctr1].length*sizeof(double));
	/* fill in the score array */ 
	scoring (&options,  alignment+almtctr1, score[almtctr1]);
	
	/* translate the scoring into rank order */
	rank_order[almtctr1] = emalloc ( alignment[almtctr1].length*sizeof(int));
	score2rank (score[almtctr1], rank_order[almtctr1], alignment[almtctr1].length);
	
	/* mapping between the protein and the alignment almtctr1 */
	if ( ! (almt2prot[almtctr1] = (int *) emalloc (alignment[almtctr1].length*sizeof(int))) )exit (1);
	if ( ! (prot2almt[almtctr1] = (int *) emalloc (protein.length*sizeof(int))) )exit (1);
	retval    = struct_almt_mapping (&protein, alignment+almtctr1, options.query,  prot2almt[almtctr1], almt2prot[almtctr1]);
	if (retval) exit(retval);
	
	/* find coverage info implied by the scoring array */
	if ( ! (res_rank[almtctr1] = (int*) emalloc (protein.length*sizeof(int))) ) exit (1);
	if ( ! (int_cvg[almtctr1] =  (int*) emalloc (protein.length*sizeof(int))) ) exit (1);
	coverage ( &protein, almt2prot[almtctr1], score[almtctr1], alignment[almtctr1].length,
		   res_rank[almtctr1], int_cvg[almtctr1] );
	/*clustering score*/
	clustering_score[almtctr1]  =  (double*) emalloc (protein.length*sizeof(double));
	if (!clustering_score[almtctr1]) exit(retval);
	clustering ( &protein,  res_rank[almtctr1], int_cvg[almtctr1], clustering_score[almtctr1]);
	/* cumulative clustering score*/
	area[almtctr1]  = area_over_coverage (int_cvg[almtctr1], clustering_score[almtctr1], protein.length);
					     
	printf ( "\t   %4d   %20s   %8.3lf     %8.3lf  %8.3lf \n",
		 almtctr1, options.almtname[almtctr1], distance[almtctr1], pctg_gaps, area[almtctr1]);
    }

    /* find the table of correlations */
    if ( ! (correlated = intmatrix ( options.no_of_alignments, options.no_of_alignments) ) ) return 1;
    for ( almtctr1 = 0; almtctr1 < options.no_of_alignments -1; almtctr1++) {
	correlated[almtctr1][almtctr1] = 1;
	for ( almtctr2 = almtctr1+1; almtctr2 < options.no_of_alignments; almtctr2++) {
	    if ( alignment[almtctr1].length != alignment[almtctr2].length  ) {
		fprintf ( stderr, "Error alignments in the files %s and %s ",
			  options.almtname[almtctr1], options.almtname[almtctr2]);
		fprintf ( stderr, "seem to be of unequal length: %d and %d.\n",
			  alignment[almtctr1].length ,  alignment[almtctr2].length);
		return 1;
	    }
	    corr = spearman ( rank_order[almtctr1], rank_order[almtctr2], alignment[almtctr1].length );
	    printf ( " %3d  %3d  %8.4lf\n", almtctr1, almtctr2, corr);
	    correlated[almtctr1][almtctr2] = ( corr > 0.9 );
	}
    }

    
    /* find corelated clusters (of sequence selections)*/
    {
	int  *cluster_count_per_size;
	int  no_of_clusters;
	int  max_size, secnd_max_size , ** cluster;
	int size = options.no_of_alignments;
	int i,j;
	double dist, ar, max_area, dist_at_max_area;
	double min_dist_at_max_area, min_dist, max_area_at_min_dist;
	int almt_no, min_dist_almt;
	int cluster_counter (int  no_of_things,  int *neighbors[],
			      int cluster_count_per_size[], int * no_of_clusters,
			      int * max_size, int * secnd_max_size , int * cluster[]);
	
	
	if ( ! ( cluster_count_per_size = emalloc (size*sizeof(int)))) return 1; 
	if ( ! (cluster = intmatrix ( size+1, size+1) ) ) return 1;
	retval = cluster_counter (size,  correlated,  cluster_count_per_size,  &no_of_clusters,
			 & max_size,  &secnd_max_size , cluster);
	if ( retval ) return 1;

	printf ( "number of clusters: %d \n", no_of_clusters);
	for (i=0; i<=size; i++ ) {
	    if ( ! cluster[i][0] ) continue;
	    if ( !i ) {
		printf ( "\t isolated:\n");
	    } else {
		printf ("\t cluster size: %3d \n", cluster[i][0]); 
	    }
	    for (j=1; j <= cluster[i][0]; j++ ) {
		printf ( "%3d ", cluster[i][j] );
	    }
	    printf ( "\n");
	}

	
	/* which cluster is the closest to the singled out sequence ("special") */
	min_dist_at_max_area = dist_at_max_area = 10;
	max_area_at_min_dist = min_dist = -10;
	min_dist_almt = -1;
	for (i=0; i<=size; i++ ) {
	    if ( ! cluster[i][0] ) continue;
	    
	    max_area = -100;
	    almt_no =  dist_at_max_area = -1;
	    
	    for (j=1; j <= cluster[i][0]; j++ ) {
		dist = distance[cluster[i][j]] ;
		ar =  area[cluster[i][j]] ;
		if ( max_area < ar ) {
		    max_area = ar;
		    dist_at_max_area = dist;
		    almt_no = cluster[i][j];
		}
	    }
	    if ( almt_no < 0 ) {
		fprintf ( stderr, "Error selecting the alignment (1)\n");
		exit (1);
	    }
	    
	    if ( min_dist_at_max_area > dist_at_max_area ) {
		min_dist = dist_at_max_area;
		max_area_at_min_dist = max_area;
		min_dist_almt = almt_no;
	    }
	}
	if ( min_dist_almt < 0 ) {
	    fprintf ( stderr, "Error selecting the alignment (2)\n");
	    exit (1);
	}
	
	printf ( "choosing alignment %d %s (distance: %5.3f  area: %6.3f)\n",
		min_dist_almt, options.almtname[min_dist_almt],  min_dist, max_area_at_min_dist);
	
	
	free (cluster_count_per_size);
	free_matrix ( (void **) cluster);
    }
    free (score);

    logger ( &options, NOTE, "");
    return 0;
}
Beispiel #2
0
int main(int argc, char *argv[])
{
/* Local declarations. */
  struct Zoltan_Struct *zz = NULL;

  char  *cmd_file;
  char   cmesg[256]; /* for error messages */

  float  version;

  int    Proc, Num_Proc;
  int    iteration;
  int    error, gerror;
  int    print_output = 1;

  MESH_INFO  mesh;             /* mesh information struct */
  PARIO_INFO pio_info;
  PROB_INFO  prob;

/***************************** BEGIN EXECUTION ******************************/

  /* initialize MPI */
  MPI_Init(&argc, &argv);

#ifdef VAMPIR
  VT_initialize(&argc, &argv);
#endif

  /* get some machine information */
  MPI_Comm_rank(MPI_COMM_WORLD, &Proc);
  MPI_Comm_size(MPI_COMM_WORLD, &Num_Proc);

  my_rank = Proc;

#ifdef HOST_LINUX
  signal(SIGSEGV, meminfo_signal_handler);
  signal(SIGINT, meminfo_signal_handler);
  signal(SIGTERM, meminfo_signal_handler);
  signal(SIGABRT, meminfo_signal_handler);
  signal(SIGFPE, meminfo_signal_handler);
#endif

#ifdef ZOLTAN_PURIFY
  printf("%d of %d ZDRIVE LAUNCH pid = %d file = %s\n", 
         Proc, Num_Proc, getpid(), argv[1]);
#endif

  /* Initialize flags */
  Test.DDirectory = 0;
  Test.Local_Parts = 0;
  Test.Fixed_Objects = 0;
  Test.Drops = 0;
  Test.RCB_Box = 0;
  Test.Multi_Callbacks = 0;
  Test.Graph_Callbacks = 1;
  Test.Hypergraph_Callbacks = 1;
  Test.Gen_Files = 0;
  Test.Null_Lists = NO_NULL_LISTS;
  Test.Dynamic_Weights = .0;
  Test.Dynamic_Graph = .0;
  Test.Vtx_Inc = 0;

  Output.Text = 1;
  Output.Gnuplot = 0;
  Output.Nemesis = 0;
  Output.Plot_Partition = 0;
  Output.Mesh_Info_File = 0;

  /* Interpret the command line */
  switch(argc)
  {
  case 1:
    cmd_file = "zdrive.inp";
    break;

  case 2:
    cmd_file = argv[1];
    break;

  default:
    fprintf(stderr, "MAIN: ERROR in command line,");
    if(Proc == 0)
    {
      fprintf(stderr, " usage:\n");
      fprintf(stderr, "\t%s [command file]", DRIVER_NAME);
    }
    exit(1);
    break;
  }

  /* initialize Zoltan */
  if ((error = Zoltan_Initialize(argc, argv, &version)) != ZOLTAN_OK) {
    sprintf(cmesg, "fatal: Zoltan_Initialize returned error code, %d", error);
    Gen_Error(0, cmesg);
    error_report(Proc);
    print_output = 0;
    goto End;
  }

  /* initialize some variables */
  initialize_mesh(&mesh, Proc);

  pio_info.dsk_list_cnt		= -1;
  pio_info.file_comp            = STANDARD;
  pio_info.num_dsk_ctrlrs	= -1;
  pio_info.pdsk_add_fact	= -1;
  pio_info.zeros		= -1;
  pio_info.file_type		= -1;
  pio_info.chunk_reader         = 0;
  pio_info.init_dist_type	= -1;
  pio_info.init_size		= ZOLTAN_ID_INVALID;
  pio_info.init_dim 		= -1;
  pio_info.init_vwgt_dim 	= -1;
  pio_info.init_dist_pins       = -1;
  pio_info.pdsk_root[0]		= '\0';
  pio_info.pdsk_subdir[0]	= '\0';
  pio_info.pexo_fname[0]	= '\0';

  prob.method[0]		= '\0';
  prob.num_params		= 0;
  prob.params			= NULL;

  /* Read in the ascii input file */
  error = gerror = 0;
  if (Proc == 0) {
    printf("\n\nReading the command file, %s\n", cmd_file);
    if (!read_cmd_file(cmd_file, &prob, &pio_info, NULL)) {
      sprintf(cmesg,"fatal: Could not read in the command file"
              " \"%s\"!\n", cmd_file);
      Gen_Error(0, cmesg);
      error_report(Proc);
      print_output = 0;
      error = 1;
    }

    if (!check_inp(&prob, &pio_info)) {
      Gen_Error(0, "fatal: Error in user specified parameters.\n");
      error_report(Proc);
      print_output = 0;
      error = 1;
    }

    print_input_info(stdout, Num_Proc, &prob, &pio_info, version);
  }

  MPI_Allreduce(&error, &gerror, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD);
  if (gerror) goto End;

  /* broadcast the command info to all of the processor */
  brdcst_cmd_info(Proc, &prob, &pio_info, &mesh);

  Zoltan_Set_Param(NULL, "DEBUG_MEMORY", "1");
  print_output = Output.Text;

  /*
   *  Create a Zoltan structure.
   */
  if ((zz = Zoltan_Create(MPI_COMM_WORLD)) == NULL) {
    Gen_Error(0, "fatal:  NULL returned from Zoltan_Create()\n");
    return 0;
  }

  if (!setup_zoltan(zz, Proc, &prob, &mesh, &pio_info)) {
    Gen_Error(0, "fatal: Error returned from setup_zoltan\n");
    error_report(Proc);
    print_output = 0;
    goto End;
  }

  /* srand(Proc); Different seeds on different procs. */
  srand(1);  /* Same seed everywhere. */

  if (Test.Dynamic_Weights){
    /* Set obj weight dim to 1; can be overridden by user parameter */
    Zoltan_Set_Param(zz, "OBJ_WEIGHT_DIM", "1");
  }

  /* Loop over read and balance for a number of iterations */
  /* (Useful for testing REUSE parameters in Zoltan.) */
  for (iteration = 1; iteration <= Number_Iterations; iteration++) {

    if (Proc == 0) {
      printf("Starting iteration %d\n", iteration); 
      fflush(stdout);
    }

    /*
     * now read in the mesh and element information.
     * This is the only function call to do this. Upon return,
     * the mesh struct and the elements array should be filled.
     */
    if (iteration == 1) {
      if (!read_mesh(Proc, Num_Proc, &prob, &pio_info, &mesh)) {
        Gen_Error(0, "fatal: Error returned from read_mesh\n");
        error_report(Proc);
        print_output = 0;
        goto End;
      }
      /*
       *  Create a Zoltan DD for tracking elements during repartitioning.
       */

      if (mesh.data_type == ZOLTAN_HYPERGRAPH && !build_elem_dd(&mesh)) {
        Gen_Error(0, "fatal: Error returned from build_elem_dd\n");
        error_report(Proc);
        print_output = 0;
        goto End;
      }
    }


#ifdef KDDKDD_COOL_TEST
/* KDD Cool test of changing number of partitions  */
    sprintf(cmesg, "%d", Num_Proc * iteration);
    Zoltan_Set_Param(zz, "NUM_GLOBAL_PARTS", cmesg);
#endif

    /*
     * Produce files to verify input.
     */
    if (iteration == 1) {
      if (Debug_Driver > 2) {
        if (!output_results(cmd_file,"in",Proc,Num_Proc,&prob,&pio_info,&mesh)){
          Gen_Error(0, "fatal: Error returned from output_results\n");
          error_report(Proc);
        }
        if (Output.Gnuplot)
          if (!output_gnu(cmd_file,"in",Proc,Num_Proc,&prob,&pio_info,&mesh)) {
            Gen_Error(0, "warning: Error returned from output_gnu\n");
            error_report(Proc);
          }
      }
      if (Test.Vtx_Inc<0){
        /* Read Citeseer data from file */
        FILE *fp;
        int i=0;
        if (Proc==0){
          fp = fopen("months.txt", "r");
          if (!fp)
            printf("ERROR: Couldn't open file months.txt\n");
          while (fscanf(fp, "%d", &CITESEER[i])==1){
            ++i;
          }
          fclose(fp);
        }
        MPI_Bcast (CITESEER, 200, MPI_INT, 0, MPI_COMM_WORLD);
      }
    }

    if (Test.Dynamic_Graph > 0.0){
      if (mesh.data_type == ZOLTAN_GRAPH) {
        remove_random_vertices(&mesh, iteration, Test.Dynamic_Graph); 
      }
      else{
        Gen_Error(0, "fatal: \"test dynamic graph\" only works on graphs, not hypergraphs\n");
        error_report(Proc);
        print_output = 0;
        goto End;
      }
    }

    if (Test.Vtx_Inc){
      if (mesh.data_type == ZOLTAN_HYPERGRAPH ) {
        if (Test.Vtx_Inc>0)
          mesh.visible_nvtx += Test.Vtx_Inc; /* Increment uniformly */
        else
          mesh.visible_nvtx = CITESEER[iteration-1]; /* Citeseer document matrix. */
      }
      else{
        Gen_Error(0, "fatal: \"vertex increment\" only works on hypergraphs\n");
        error_report(Proc);
        print_output = 0;
        goto End;
      }
    }

    /*
     * now run Zoltan to get a new load balance and perform
     * the migration
     */
  
#ifdef IGNORE_FIRST_ITERATION_STATS
if (iteration == 1) {
  /* Exercise partitioner once on Tbird because first run is slow. */
  /* Lee Ann suspects Tbird is loading shared libraries. */
  struct Zoltan_Struct *zzcopy;
  zzcopy = Zoltan_Copy(zz);
  /* Don't do any migration or accumulate any stats. */
  if (Proc == 0) printf("%d KDDKDD IGNORING FIRST ITERATION STATS\n", Proc);
  Zoltan_Set_Param(zzcopy, "RETURN_LISTS", "NONE");
  Zoltan_Set_Param(zzcopy, "FINAL_OUTPUT", "0");
  Zoltan_Set_Param(zzcopy, "USE_TIMERS", "0");
  if (!run_zoltan(zzcopy, Proc, &prob, &mesh, &pio_info)) {
    Gen_Error(0, "fatal: Error returned from run_zoltan\n");
    error_report(Proc);
    print_output = 0;
    goto End;
  }
  Zoltan_Destroy(&zzcopy);
}
#endif /* IGNORE_FIRST_ITERATION_STATS */
#ifdef RANDOM_DIST
 if (iteration % 2 == 0) {
   char LB_METHOD[1024];

  if (Proc == 0) printf("%d CCCC Randomizing the input\n", Proc);
   strcpy(LB_METHOD, prob.method);
   strcpy(prob.method, "RANDOM");
   Zoltan_Set_Param(zz, "LB_METHOD", "RANDOM");
   Zoltan_Set_Param(zz, "RETURN_LISTS", "ALL");
    if (!run_zoltan(zz, Proc, &prob, &mesh, &pio_info)) {
      Gen_Error(0, "fatal: Error returned from run_zoltan\n");
      error_report(Proc);
      print_output = 0;
      goto End;
    }
   Zoltan_Set_Param(zz, "RETURN_LISTS", "NONE");
   Zoltan_Set_Param(zz, "LB_METHOD", LB_METHOD);
   strcpy(prob.method, LB_METHOD);
  if (Proc == 0) printf("%d CCCC Randomizing the input -- END\n", Proc);
 }
#endif /* RANDOM_DIST */
    if (!run_zoltan(zz, Proc, &prob, &mesh, &pio_info)) {
      Gen_Error(0, "fatal: Error returned from run_zoltan\n");
      error_report(Proc);
      print_output = 0;
      goto End;
    }

    /* Reset the mesh data structure for next iteration. */
    if (iteration < Number_Iterations) {
      int i, j;
      float tmp;
      float twiddle = 0.01;
      char str[4];
      /* Perturb coordinates of mesh */
      if (mesh.data_type == ZOLTAN_GRAPH){
        for (i = 0; i < mesh.num_elems; i++) {
          for (j = 0; j < mesh.num_dims; j++) {
            /* tmp = ((float) rand())/RAND_MAX; *//* Equiv. to sjplimp's test */
            tmp = (float) (i % 10) / 10.;
            mesh.elements[i].coord[0][j] += twiddle * (2.0*tmp-1.0);
            mesh.elements[i].avg_coord[j] = mesh.elements[i].coord[0][j];
          }
        }
        /* Increase weights in some parts */
        if (Test.Dynamic_Weights){
          /* Randomly pick 10% of parts to "refine" */
          /* Note:  Assumes at least 10 parts!  */
          /* Increase vertex weight, and also edge weights? TODO */
          j = (int) ((10.0*rand())/RAND_MAX + .5);
          for (i = 0; i < mesh.num_elems; i++) {
            if ((mesh.elements[i].my_part%10) == j){
                mesh.elements[i].cpu_wgt[0] = Test.Dynamic_Weights*(1+rand()%5);
            }
          }
        }
      }
      /* change the ParMETIS Seed */
      sprintf(str, "%d", iteration);
#ifdef ZOLTAN_PARMETIS      
      Zoltan_Set_Param(zz, "PARMETIS_SEED", str);
#endif
    }

  } /* End of loop over read and balance */

  if (Proc == 0) {
    printf("FILE %s:  Total:    %e seconds in Partitioning\n", 
           cmd_file, Total_Partition_Time);
    printf("FILE %s:  Average:  %e seconds per Iteration\n", 
           cmd_file, Total_Partition_Time/Number_Iterations);
  }

End:
  Zoltan_Destroy(&zz);
  if (mesh.dd) Zoltan_DD_Destroy(&(mesh.dd));

  Zoltan_Memory_Stats();

  /*
   * output the results
   */
  if (print_output) {
    if (!output_results(cmd_file,"out",Proc,Num_Proc,&prob,&pio_info,&mesh)) {
      Gen_Error(0, "fatal: Error returned from output_results\n");
      error_report(Proc);
    }

    if (Output.Gnuplot) {
      if (!output_gnu(cmd_file,"out",Proc,Num_Proc,&prob,&pio_info,&mesh)) {
        Gen_Error(0, "warning: Error returned from output_gnu\n");
        error_report(Proc);
      }
    }
  }

  free_mesh_arrays(&mesh);
  if (prob.params != NULL) free(prob.params);
  MPI_Finalize();
  
#ifdef VAMPIR
  VT_finalize();
#endif

  return 0;
}
Beispiel #3
0
int main(int argc, char * argv[]) {

    Descr qry_descr = {
        {0}
    };
    Descr tgt_descr = {
        {0}
    };
    clock_t CPU_time_begin, CPU_time_end;
    int retval, qry_done, tgt_done;
    int db_ctr, db_effective_ctr;
    int user_defined_name;
    FILE * qry_fptr = NULL, * tgt_fptr = NULL, * digest = NULL;

//    Score score;


    //int compare(Descr *descr1, Descr *descr2, Score * score);
    int compare(Descr *descr1, Descr *descr2, Score * score, Score * score_hung);
    int read_cmd_file(char *filename);

    if (argc < 3) {
        fprintf(stderr,
                "Usage: %s <db file> <qry file> [<parameter file>].\n",
                argv[0]);
        exit(1);
    }
    if (!(qry_fptr = efopen(argv[2], "r"))) return 1;
    if (!(tgt_fptr = efopen(argv[1], "r"))) return 1;

    /* set defaults: */
    set_default_options();

    /* change them with the cmd file, if the cmd file given */
    if (argc == 4) {
        if (read_cmd_file(argv[3])) return 1;
    }

    /* read in the table of integral values */
    /* the array int_table in struct_table.c */
    if (read_integral_table(options.path)) {
        fprintf(stderr, "In data file  %s.\n\n", options.path);
        exit(1);
    }
    set_up_exp_table();

    user_defined_name = options.outname[0];


    /*********************************/
    /* loop over the query database :*/
    qry_done = 0;
    retval = -1;
    db_effective_ctr = 0;
    CPU_time_begin = clock();

    while (!qry_done) {
        retval = get_next_descr(qry_fptr, &qry_descr);
        if (retval == 1) {
            continue;
        } else if (retval == -1) {
            qry_done = 1;
            continue;
        }

        /* digest file for larger scale comparisons */
        if (!digest) {
            if (!user_defined_name) {
                sprintf(options.outname, "%s.struct_out",
                        qry_descr.name);
            }

            // ************ added by Mile
            // output name in postprocessing consists of query and target name
            retval = get_next_descr(tgt_fptr, &tgt_descr);
            if (options.postprocess) {
                sprintf(options.outname, "%s_%s.struct_out", qry_descr.name, tgt_descr.name);
            }


            // ************* end by Mile

            digest = efopen(options.outname, "w");
            if (!digest) exit(1);
            if (options.print_header) {
                fprintf(digest, "%% columns: \n");
                fprintf(digest, "%% query, target: structure names\n");
                fprintf(digest, "%% geom_z:  z score for the orientational match \n");
                fprintf(digest, "%% <dL>:    average length mismatch for matched SSEs \n");
                fprintf(digest, "%% T:       total score assigned to matched SSEs \n");
                fprintf(digest, "%% frac:    T divided by the number of matched SSEs \n");
                fprintf(digest, "%% GC_rmsd: RMSD btw geometric centers of matched SSEs (before postprocessing) \n");
                fprintf(digest, "%% A:       (after postprocessing) the alignment score \n");
                fprintf(digest, "%% aln_L:   (after postprocessing) the alignment length \n\n");
                fprintf(digest, "%% %6s%6s %6s %6s  %6s %6s %6s %6s %6s %6s \n",
                        "query ", "target ", "geom_z", "<dL>", "  T  ", "frac",
                        "GC_rmsd", "rmsd  ", "A  ", "aln_L  ");
            }

        } else {
            /* otherwise write to the same old digest file */
        }

        /* loop over the database :*/


        // Added by Mile - using FOR instead of WHILE - parallelization

        int tgt_counter = 0;
        int i;
        int *retval_array;
        Descr *tgt_descr_array;
         
        rewind(tgt_fptr);
        tgt_done = 0;

        /*
         * Counting number of successful targets
         */
        while (!tgt_done) {
            retval = get_next_descr(tgt_fptr, &tgt_descr);
            if (retval == 0 || retval == 1) {
                tgt_counter++;
            } else if (retval == -1) {
                tgt_done = 1;
            }

        }
        /*
         * Initialization of a Descr array (array of targets) - easy parallelization
         */
        
        rewind(tgt_fptr);
        tgt_descr_array = (Descr *) calloc(tgt_counter, sizeof(Descr));
        if (tgt_descr_array == NULL) {
            printf("malloc return NULL!\n");
        }
        retval_array = (int *) calloc(tgt_counter, sizeof(int));    
        if (retval_array == NULL) {
            printf("malloc return NULL!\n");
        }
        
        
        /*
         * Storing targets a returning values
         */
        
        for(i = 0; i < tgt_counter; ++i) {
            retval = get_next_descr(tgt_fptr, &tgt_descr_array[i]);
            retval_array[i] = retval;
        }
        
        // Added by Mile - end


        rewind(tgt_fptr);
        //	tgt_done = 0;
        db_ctr = 0;
        db_effective_ctr = 0;
        if (!user_defined_name) CPU_time_begin = clock();
        retval = -1;

        /*
                while ( ! tgt_done) {
         */
        
        // Start of parallelization
        if (options.postprocess) omp_set_num_threads(1);
        else omp_set_num_threads(6);
        
        #pragma omp parallel // num_threads(1)
        {

            #pragma omp for
            for (i = 0; i < tgt_counter; ++i) { // Added by Mile

                
                int retval = retval_array[i];
                /*
                 * Two scores one for Smith Waterman, another for Hungarian in database search phase
                 */
                Score score;
                Score score_hung;
                Descr tgt_descr = tgt_descr_array[i];
/*
                printf("%s %d\n", tgt_descr.name, retval);
*/
                
 //               Descr qry_descr = qry_descr;
                
                
                #pragma omp atomic
                db_ctr++; // atomic
                
/*
                retval = get_next_descr(tgt_fptr, &tgt_descr);
*/
                if (retval == 1) {
                    continue;
                } else if (retval == -1) {
                    //  tgt_done = 1;
                    printf("Error!!!!\n");
                    exit(1);
                    // added by Mile
                } else {

                    /* min number of elements */
                    int helix_overlap =
                            (qry_descr.no_of_helices < tgt_descr.no_of_helices) ?
                            qry_descr.no_of_helices : tgt_descr.no_of_helices;
                    int strand_overlap =
                            (qry_descr.no_of_strands < tgt_descr.no_of_strands) ?
                            qry_descr.no_of_strands : tgt_descr.no_of_strands;
                    double fraction_assigned;
                    int query_size = qry_descr.no_of_strands + qry_descr.no_of_helices;
                    int target_size = tgt_descr.no_of_strands + tgt_descr.no_of_helices;
                    if (helix_overlap + strand_overlap >= options.min_no_SSEs) {

                        #pragma omp atomic
                        db_effective_ctr++; // atomic

                        /* here is the core of the operation: */
                        retval = compare(&tgt_descr, &qry_descr, &score, &score_hung);
                        if (retval) {
                            printf(" error comparing  db:%s   query:%s   \n",
                                    tgt_descr.name, qry_descr.name);
                            exit(retval);
                        }

 
                        /*
                         * Output score. Can be based:
                         * - only on SW alignment during the database search
                         * - only on Hungarian algorithm during the database search
                         * - on combination depending on the postprocessing score
                         */  
                                
                        
                        switch (options.score_out) {
                            case 0: // SW
                                if (query_size > target_size) {
                                   fraction_assigned = score.total_assigned_score / target_size;
                                } else {
                                   fraction_assigned = score.total_assigned_score / query_size;
                                }
                                retval =  print_score(digest, &qry_descr, &tgt_descr, &score, fraction_assigned, 1);  
                                break;
                            case 1: // Hungarian
                                if (query_size > target_size) {
                                    fraction_assigned = score_hung.total_assigned_score / target_size;
                                } else {
                                    fraction_assigned = score_hung.total_assigned_score / query_size;
                                }
                                retval =  print_score(digest, &qry_descr, &tgt_descr, &score_hung, fraction_assigned, 1);
                                break;
                            case 2: // either SW or Hungarian depends on score
                                if (score.total_assigned_score > score_hung.total_assigned_score) {
                                    if (query_size > target_size) {
                                        fraction_assigned = score.total_assigned_score / target_size;
                                    } else {
                                        fraction_assigned = score.total_assigned_score / query_size;
                                    }
                                    retval =  print_score(digest, &qry_descr, &tgt_descr, &score, fraction_assigned, 1);
                                } else {
                                    if (query_size > target_size) {
                                        fraction_assigned = score_hung.total_assigned_score / target_size;
                                    } else {
                                        fraction_assigned = score_hung.total_assigned_score / query_size;
                                    }
                                    retval =  print_score(digest, &qry_descr, &tgt_descr, &score_hung, fraction_assigned, 1);
                                    
                                }
                                break;
                        }
                            
                            
                        
                        if (retval) {
                            printf("error in printing to output file\n");
                            exit(retval);
                        }

                    } else if (options.report_no_sse_overlap) {
                        retval =  print_score(digest, &qry_descr, &tgt_descr, &score, fraction_assigned, 0);
                        if (retval) {
                            printf("error in printing to output file\n");
                            exit(retval);
                        }
                    }
                }
                /*
                if (options.postprocess) tgt_done = 1; // for now, we postprocess only
                                                    one pair of structures (not structure against database) */
               // if (options.postprocess) break; // added by Mile tricky but I think it should work even without it
            }

        }

        // Added by Mile
        // Memory cleaning
        for(i = 0; i < tgt_counter; ++i) {
            descr_shutdown ( &tgt_descr_array[i] );
        }
        free(tgt_descr_array);
        free(retval_array);    
        
        
        // End added by Mile
        
        if (!user_defined_name && db_effective_ctr) {
            CPU_time_end = clock();
            fprintf(digest, "done   CPU:  %10.3lf s\n", (double) (CPU_time_end - CPU_time_begin) / CLOCKS_PER_SEC);
            fflush(digest);
        }

        if (!user_defined_name) {
            fclose(digest);
            digest = NULL;
        } /* otherwise we keep writing into the saem digest file */

        if (options.postprocess) qry_done = 1; /* for now, we postprocess only
						one pair of structures (not structure against database) */

    }

    if (digest) {
        CPU_time_end = clock();
        fprintf(digest, "done   CPU:  %10.3lf s\n", (double) (CPU_time_end - CPU_time_begin) / CLOCKS_PER_SEC);
        fflush(digest);
    }
    if (options.verbose) {
        printf("\n\nlooked at %d db entries.\n",
                db_effective_ctr);
        printf("the output written to %s.\n\n", options.outname);
    }
    /**************************************************/
    /* housekeeping, good for tracking memory leaks   */ if (digest) fclose(digest);
//    map_consistence(0, 0, NULL, NULL, NULL, NULL, NULL);
//    compare(NULL, NULL, NULL);
    descr_shutdown(&qry_descr);
    descr_shutdown(&tgt_descr);

    fclose(qry_fptr);
    fclose(tgt_fptr);

    return 0;

}
Beispiel #4
0
int process_input_instructions (int argc, char *argv[],
				int * tgt_input_type_ptr, char * tgt_chain_ptr, Descr * tgt_descr, FILE ** tgt_fptr_ptr,
				int * qry_input_type_ptr, char * qry_chain_ptr, Descr * qry_descr, FILE ** qry_fptr_ptr) {


    char tgt_chain = '\0', qry_chain = '\0';
    int  tgt_input_type = 0, qry_input_type = 0;

    FILE *qry_fptr    = NULL, *tgt_fptr = NULL;

    
    char  *cmd_filename = NULL;

    int parse_cmd_line (int argc, char * argv[],  char * tgt_chain_ptr,
			char * qry_chain_ptr, char **cmd_filename_ptr);
    int read_cmd_file (char *filename);

    
    /* process cmd line input */
    if (parse_cmd_line (argc, argv, &tgt_chain,
		        &qry_chain, &cmd_filename)) return 1;

    /* process the command (parameters) file, if provided */
    if (cmd_filename && read_cmd_file(cmd_filename))  return 1;

   
    /* check if the tgt file is  present and readable; open               */
    if ( ! (tgt_fptr = efopen(options.tgt_filename, "r"))) return 1;
    
    
    /* figure out whether we have a pdb or db input:                      */
    tgt_input_type = check_input_type (tgt_fptr);
    if ( tgt_input_type != PDB && tgt_input_type != DB ) {
	fprintf ( stderr, "Unrecognized file type: %s.\n", argv[1]);
	return 1;
    }

    /* for testing purposes we might have both: */
    if ( options.tgt_db) tgt_descr->db_file  =  options.tgt_db;
    if ( options.qry_db) qry_descr->db_file  =  options.qry_db;
    
    
    /*do something about the names for the output:                        */
    if ( tgt_input_type==PDB) {
	improvize_name (options.tgt_filename, tgt_chain, tgt_descr->name);
    }
    
    /**********************************************************************/
    /* the same for the qry file, but may not be necessary if we are      */
    /* preprocessing only                                                 */
    if ( options.qry_filename) {
	if ( ! (qry_fptr = efopen(options.qry_filename, "r"))) return 1;
	
	qry_input_type = check_input_type (qry_fptr);
	if ( qry_input_type != PDB  &&  qry_input_type != DB ) {
	    fprintf ( stderr, "Unrecognized file type: %s.\n", argv[2]);
	    exit (1);
	}
	if ( qry_input_type==PDB) {
	    improvize_name (options.qry_filename, qry_chain, qry_descr->name);
	}
	if (tgt_input_type != PDB  ||  qry_input_type != PDB)  {
	    options.postprocess        = 0;
	    options.number_maps_out = 1;
	}
	
    }
    if( !options.outname[0]
	&& tgt_descr->name && tgt_descr->name[0]
	&& qry_descr->name && qry_descr->name[0]) {
	sprintf (options.outname, "%s_to_%s",  tgt_descr->name, qry_descr->name);
    }
    if ( options.outdir[0] ) {
	/* checkk whether this directory exists */
	struct stat st;
	if ( stat(options.outdir, &st) )  {
	    fprintf (stderr, "%s  not found.\n", options.outdir);
	    return 1;
	}
    }

    if (!options.postprocess) {
         options.number_maps_out = 1;
	 options.print_header    = 0;
    }
    *tgt_input_type_ptr = tgt_input_type;
    *qry_input_type_ptr = qry_input_type;

    *tgt_chain_ptr = tgt_chain;
    *qry_chain_ptr = qry_chain;

    *tgt_fptr_ptr = tgt_fptr;
    *qry_fptr_ptr  = qry_fptr;

    return 0;

}