コード例 #1
0
ファイル: postp.old.c プロジェクト: ivanamihalek/c-utils
int main ( int argc, char * argv[]) {

    Options options;
    Protein protein;
    Alignment * alignment = NULL;
    int retval;
    int almtctr1, almtctr2;
    double **score = NULL, corr,  pctg_gaps;
    double **clustering_score = NULL;
    double *area, *distance;
    int **rank_order= NULL,**res_rank=NULL,**int_cvg=NULL ;
    int ** correlated = NULL, **almt2prot = NULL, **prot2almt = NULL;
    /* command file is required */
    if ( argc < 2 ) {
	fprintf ( stderr, "Usage: %s <command file>.\n", argv[0]);
	exit (0);
    }
    retval = read_cmd_file ( argv[1], &options);
    if (retval) exit(retval);
    retval = logger (&options, INTRO, "");
    if (retval) exit(retval);
   

    /*******************************************/
    /*                                         */
    /*  PDB input                              */
    /*                                         */
    /*******************************************/
    if ( ! options.pdbname[0]) {
	fprintf (stderr, "%s cannot work without structure (cmd file was %s).\n",
		 argv[0], argv[1]);
	exit (1);
	
    } else {

	/* warn if no chain given */
	if ( !options.chain) {
	    retval = logger (&options, WARN, "No chain specified. Using the first one.");
	    if ( retval) exit (1);
	}
	if (retval) exit(retval);
	/* read in the structure */
	retval = read_pdb (options.pdbname, &protein, options.chain);
	if (retval) exit(retval);

   }

    
    /*******************************************/
    /*                                         */
    /*  alignment scoring                      */
    /*                                         */
    /*******************************************/
    if ( ! ( alignment = emalloc ( options.no_of_alignments*sizeof(Alignment)) )) return 1;
    if ( ! ( score = emalloc ( options.no_of_alignments*sizeof(double*)) )) return 1;
    if ( ! ( rank_order = emalloc ( options.no_of_alignments*sizeof(int*)) )) return 1;
    if ( ! ( clustering_score = emalloc ( options.no_of_alignments*sizeof(double*)) )) return 1;
    if ( ! ( res_rank = emalloc ( options.no_of_alignments*sizeof(int*)) )) return 1;
    if ( ! ( int_cvg = emalloc ( options.no_of_alignments*sizeof(int*)) )) return 1;
    if ( ! ( almt2prot = emalloc ( options.no_of_alignments*sizeof(int*)) )) return 1;
    if ( ! ( prot2almt = emalloc ( options.no_of_alignments*sizeof(int*)) )) return 1;
    if ( ! ( area = emalloc ( options.no_of_alignments*sizeof(double)) )) return 1;
    if ( ! ( distance = emalloc ( options.no_of_alignments*sizeof(double)) )) return 1;

    printf ( "\t%8s   %20s  %8s  %8s  %8s  \n", "almt#", "name        ",  "<dist to qry>", "%gaps", "area");
    
    for ( almtctr1 = 0; almtctr1 < options.no_of_alignments; almtctr1++) {

	/* read in the alignment */
	retval = read_clustalw (options.almtname[almtctr1], alignment + almtctr1);
	if (retval) exit(retval);
	/* pairwise distances btw the seqs */
	retval   = seq_pw_dist (alignment+almtctr1);
	if ( retval) return retval;
	/* average dist to the query in this alignment: */ 
	distance[almtctr1] = avg_dist_to_special (&options, alignment + almtctr1);
	/* percentage of gaps in the alignment: */
	pctg_gaps = (double) alignment->total_gaps/ ( (alignment+almtctr1)->length*(alignment+almtctr1)->number_of_seqs);
	/* make the residue scoring array */
	score[almtctr1] = emalloc ( alignment[almtctr1].length*sizeof(double));
	/* fill in the score array */ 
	scoring (&options,  alignment+almtctr1, score[almtctr1]);
	
	/* translate the scoring into rank order */
	rank_order[almtctr1] = emalloc ( alignment[almtctr1].length*sizeof(int));
	score2rank (score[almtctr1], rank_order[almtctr1], alignment[almtctr1].length);
	
	/* mapping between the protein and the alignment almtctr1 */
	if ( ! (almt2prot[almtctr1] = (int *) emalloc (alignment[almtctr1].length*sizeof(int))) )exit (1);
	if ( ! (prot2almt[almtctr1] = (int *) emalloc (protein.length*sizeof(int))) )exit (1);
	retval    = struct_almt_mapping (&protein, alignment+almtctr1, options.query,  prot2almt[almtctr1], almt2prot[almtctr1]);
	if (retval) exit(retval);
	
	/* find coverage info implied by the scoring array */
	if ( ! (res_rank[almtctr1] = (int*) emalloc (protein.length*sizeof(int))) ) exit (1);
	if ( ! (int_cvg[almtctr1] =  (int*) emalloc (protein.length*sizeof(int))) ) exit (1);
	coverage ( &protein, almt2prot[almtctr1], score[almtctr1], alignment[almtctr1].length,
		   res_rank[almtctr1], int_cvg[almtctr1] );
	/*clustering score*/
	clustering_score[almtctr1]  =  (double*) emalloc (protein.length*sizeof(double));
	if (!clustering_score[almtctr1]) exit(retval);
	clustering ( &protein,  res_rank[almtctr1], int_cvg[almtctr1], clustering_score[almtctr1]);
	/* cumulative clustering score*/
	area[almtctr1]  = area_over_coverage (int_cvg[almtctr1], clustering_score[almtctr1], protein.length);
					     
	printf ( "\t   %4d   %20s   %8.3lf     %8.3lf  %8.3lf \n",
		 almtctr1, options.almtname[almtctr1], distance[almtctr1], pctg_gaps, area[almtctr1]);
    }

    /* find the table of correlations */
    if ( ! (correlated = intmatrix ( options.no_of_alignments, options.no_of_alignments) ) ) return 1;
    for ( almtctr1 = 0; almtctr1 < options.no_of_alignments -1; almtctr1++) {
	correlated[almtctr1][almtctr1] = 1;
	for ( almtctr2 = almtctr1+1; almtctr2 < options.no_of_alignments; almtctr2++) {
	    if ( alignment[almtctr1].length != alignment[almtctr2].length  ) {
		fprintf ( stderr, "Error alignments in the files %s and %s ",
			  options.almtname[almtctr1], options.almtname[almtctr2]);
		fprintf ( stderr, "seem to be of unequal length: %d and %d.\n",
			  alignment[almtctr1].length ,  alignment[almtctr2].length);
		return 1;
	    }
	    corr = spearman ( rank_order[almtctr1], rank_order[almtctr2], alignment[almtctr1].length );
	    printf ( " %3d  %3d  %8.4lf\n", almtctr1, almtctr2, corr);
	    correlated[almtctr1][almtctr2] = ( corr > 0.9 );
	}
    }

    
    /* find corelated clusters (of sequence selections)*/
    {
	int  *cluster_count_per_size;
	int  no_of_clusters;
	int  max_size, secnd_max_size , ** cluster;
	int size = options.no_of_alignments;
	int i,j;
	double dist, ar, max_area, dist_at_max_area;
	double min_dist_at_max_area, min_dist, max_area_at_min_dist;
	int almt_no, min_dist_almt;
	int cluster_counter (int  no_of_things,  int *neighbors[],
			      int cluster_count_per_size[], int * no_of_clusters,
			      int * max_size, int * secnd_max_size , int * cluster[]);
	
	
	if ( ! ( cluster_count_per_size = emalloc (size*sizeof(int)))) return 1; 
	if ( ! (cluster = intmatrix ( size+1, size+1) ) ) return 1;
	retval = cluster_counter (size,  correlated,  cluster_count_per_size,  &no_of_clusters,
			 & max_size,  &secnd_max_size , cluster);
	if ( retval ) return 1;

	printf ( "number of clusters: %d \n", no_of_clusters);
	for (i=0; i<=size; i++ ) {
	    if ( ! cluster[i][0] ) continue;
	    if ( !i ) {
		printf ( "\t isolated:\n");
	    } else {
		printf ("\t cluster size: %3d \n", cluster[i][0]); 
	    }
	    for (j=1; j <= cluster[i][0]; j++ ) {
		printf ( "%3d ", cluster[i][j] );
	    }
	    printf ( "\n");
	}

	
	/* which cluster is the closest to the singled out sequence ("special") */
	min_dist_at_max_area = dist_at_max_area = 10;
	max_area_at_min_dist = min_dist = -10;
	min_dist_almt = -1;
	for (i=0; i<=size; i++ ) {
	    if ( ! cluster[i][0] ) continue;
	    
	    max_area = -100;
	    almt_no =  dist_at_max_area = -1;
	    
	    for (j=1; j <= cluster[i][0]; j++ ) {
		dist = distance[cluster[i][j]] ;
		ar =  area[cluster[i][j]] ;
		if ( max_area < ar ) {
		    max_area = ar;
		    dist_at_max_area = dist;
		    almt_no = cluster[i][j];
		}
	    }
	    if ( almt_no < 0 ) {
		fprintf ( stderr, "Error selecting the alignment (1)\n");
		exit (1);
	    }
	    
	    if ( min_dist_at_max_area > dist_at_max_area ) {
		min_dist = dist_at_max_area;
		max_area_at_min_dist = max_area;
		min_dist_almt = almt_no;
	    }
	}
	if ( min_dist_almt < 0 ) {
	    fprintf ( stderr, "Error selecting the alignment (2)\n");
	    exit (1);
	}
	
	printf ( "choosing alignment %d %s (distance: %5.3f  area: %6.3f)\n",
		min_dist_almt, options.almtname[min_dist_almt],  min_dist, max_area_at_min_dist);
	
	
	free (cluster_count_per_size);
	free_matrix ( (void **) cluster);
    }
    free (score);

    logger ( &options, NOTE, "");
    return 0;
}
コード例 #2
0
ファイル: main.c プロジェクト: ivanamihalek/c-utils
int main ( int argc, char * argv[]) {

    char pdbname[150] = {'\0'};
    char selection_file[150] = {'\0'};
    Residue * sequence;
    int no_res, res_ctr;
    int * selected;
    double cutoff_dist, score;
    double **distmat;
    int ** cluster;
    int c;
    char chain_id = '\0';
    FILE * fclust = NULL;


    if ( argc < 5 ) {
      	fprintf (stderr,
      		 "Usage: %s <pdbfile>  <chain id>|'-'  <selection file>  <cutoff dist> \n"
           "where selection file is of the format (for example) \n"
           "F    3 \n"
           "K   48 \n"
           "R   50 \n"
           "D   59 \n"
           "E   82 \n"
           "G   91 \n"
           "A   97 \n"
           "C  100 \n"
           "etc \n"
           "and the cutoff distance is given in Angstroms\n",
      		 argv[0]);
      	exit (1);
    }
    sprintf ( pdbname, "%s", argv[1]);
    chain_id =  argv[2][0]=='-' ? '\0' : argv[2][0];
    sprintf ( selection_file, "%s", argv[3]);
    cutoff_dist = atof ( argv[4]);
    printf ("cutoff distance: %5.2lf Angstroms\n", cutoff_dist);

    /* input the structure */
    if ( read_pdb ( pdbname, &chain_id, &sequence, &no_res) ) exit (1);
    printf ("there are %d residues in %s, chain %c.\n",  no_res, pdbname, chain_id);

    /* input selection */
    selected = emalloc (no_res*sizeof(int));
    if (! selected) {
      	fprintf (stderr, "error allocating selection array\n");
      	exit(1); // leaky, leaky
    }
    if ( read_selection (sequence, no_res, selection_file, selected) ) exit (1);

    /* allocate space for dist matrix */
    distmat = (double**) dmatrix (0,  no_res-1, 0,  no_res-1);
    /* calculate dist */
    if ( determine_dist_matrix(distmat,  sequence, no_res) ) exit(1);

    /* cluster counting ... */
  	int  no_of_clusters, max_size, secnd_max_size;
  	int * cluster_count;
  	int ** neighbors;
  	int res1, res2;

  	cluster_count       =  (int *) emalloc ( (no_res+1)*sizeof(int));
  	cluster             =  imatrix (0, no_res, 0, no_res);
  	neighbors           =  imatrix (0, no_res-1, 0, no_res-1);

  	for (res1=0;  res1 < no_res; res1++ ) {
  	    neighbors [res1][res1] = 1;
  	    for (res2= res1+1; res2 < no_res; res2++ ) {
        		neighbors[res1][res2] = ( distmat[res1][res2] < cutoff_dist);
        		neighbors[res2][res1] = neighbors[res1][res2];
  	    }
  	}

	  cluster_counter (no_res,  neighbors,  selected, cluster_count, & no_of_clusters,
			   &max_size, &secnd_max_size , cluster);
	  clustering_z_score ( no_res,  neighbors,  selected, &score);

  	if (0) {
        	  printf ("runnning simulation ...\n");
      	    int cluster_score (int no_of_res, int *seq, int ** adj_matrix,double *score);
      	    int i, n;
      	    int no_selected = 0;
      	    for (i=0; i<no_res; i++) no_selected += selected[i];
      	    cluster_score (no_res, selected, neighbors, &score);
      	    printf (" selected  %4d   original cluster score = %8.2e\n", no_selected, score);
      	    double frac = (double)no_selected/no_res;
      	    double score = 0;
      	    for (n=0; n<100; n++) { // number of reps
      		      memset (selected, 0, no_res*sizeof(int));
            		for (i=0; i<no_res; i++) {
            		    if (drand48() < frac) selected[i] = 1;
            		}
            		cluster_score (no_res, selected, neighbors, &score);
            		no_selected = 0;
            		for (i=0; i<no_res; i++) no_selected += selected[i];
            		printf  (" selected  %4d    random score = %8.2e\n",  no_selected, score);
      	    }
  	}

    /* output */
    fclust = stdout;
    for ( c=0; c <= no_res; c++) {
      	if ( ! cluster[c][0] ) {
      	    continue;
      	}
      	if ( !c ) {
      	    fprintf ( fclust,"\t isolated:\n");
      	} else {
      	    fprintf ( fclust,"\t cluster size: %3d \n", cluster[c][0]);
      	}
      	for ( res_ctr=1; res_ctr <=  cluster[c][0]; res_ctr++) {
      	    fprintf ( fclust, "%s  \n", sequence[ cluster[c][res_ctr] ].pdb_id );
      	}
    }
    fprintf (fclust, "\nclustering  z-score:  %8.3f\n", score);



    return 0;



}
コード例 #3
0
ファイル: rw-task-fair.cpp プロジェクト: mohamed/schedcat
static Interference bound_blocking_all(
	const TaskInfo* tsk,
	const ContentionSet& all_reqs, // presumed sorted, for all clusters/tasks
	const unsigned int max_remote_requests, // per cluster
	const unsigned int max_local_requests,  // local cluster
	const unsigned int max_requests,        // per task
	unsigned int max_total)                 // stop after counting max_total
{
	unsigned long interval = tsk->get_response();
	hashmap<unsigned long, unsigned int> task_counter(512);
	hashmap<unsigned long, unsigned int>::iterator tctr;
	hashmap<unsigned int, unsigned int> cluster_counter(64);
	hashmap<unsigned int, unsigned int>::iterator cctr;
	Interference inter;

	cluster_counter[tsk->get_cluster()] = max_local_requests;

	foreach(all_reqs, it)
	{
		const RequestBound* req = *it;
		const TaskInfo* t = req->get_task();
		unsigned long key = (unsigned long) t;
		unsigned int cluster = t->get_cluster();

		if (!max_total)
			// we are done
			break;

		if (t == tsk)
			// doesn't block itself
			continue;

		// make sure we have seen this task
		tctr = task_counter.find(key);
		if (tctr == task_counter.end())
		{
			task_counter[key] = max_requests;
			tctr = task_counter.find(key);
		}

		if (!tctr->second)
			continue;

		cctr = cluster_counter.find(cluster);
		if (cctr == cluster_counter.end())
		{
			cluster_counter[cluster] = max_remote_requests;
			cctr = cluster_counter.find(cluster);
		}

		if (!cctr->second)
			continue;

		unsigned int remaining;
		remaining = std::min(tctr->second, cctr->second);
		remaining = std::min(remaining, max_total);
		unsigned int num = std::min(req->get_max_num_requests(interval), remaining);

		inter.total_length += num * req->get_request_length();
		inter.count        += num;
		cctr->second -= num;
		tctr->second -= num;
		max_total    -= num;
	}

	return inter;
}