Exemple #1
0
sint palign1(void)  /* a profile alignment */
{
   sint 		i,j,temp;
   sint 		entries;
   sint 		*aligned, *group;
   float        dscore;
   lint			score;

   info("Start of Initial Alignment");

/* calculate sequence weights according to branch lengths of the tree -
   weights in global variable seq_weight normalised to sum to INT_SCALE_FACTOR */

   temp = INT_SCALE_FACTOR/nseqs;
   for (i=0;i<nseqs;i++) seq_weight[i] = temp;

   distance_tree = FALSE;

/* do the initial alignment.........  */

   group = (sint *)ckalloc( (nseqs+1) * sizeof (sint));

   for(i=1; i<=profile1_nseqs; ++i)
         group[i] = 1;
   for(i=profile1_nseqs+1; i<=nseqs; ++i)
         group[i] = 2;
   entries = nseqs;

   aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) );
   for (i=1;i<=nseqs;i++) aligned[i] = 1;

   score = prfalign(group, aligned);
   info("Sequences:%d      Score:%d",(pint)entries,(pint)score);
   group=ckfree((void *)group);
   aligned=ckfree((void *)aligned);

   for (i=1;i<=nseqs;i++) {
     for (j=i+1;j<=nseqs;j++) {
       dscore = countid(i,j);
       tmat[i][j] = ((double)100.0 - (double)dscore)/(double)100.0;
       tmat[j][i] = tmat[i][j];
     }
   }

   return(nseqs);
}
Exemple #2
0
int main(int argc,char **argv)
{
	sint i,j,k,n,s;
	sint status;
	sint result_type;
	char c;
	char infile[FILENAMELEN+1];
	char treefile[FILENAMELEN+1];
	float meanid;
	FILE *tree;
	sint maxres,*gapptr=NULL;
	IN_TREEPTR itree;
	double dscore;
	double meanscore;
	double **tmat;
	sint window;
	sint block_cutoff;
	OPT opt;

	if(argc!=2) {
		fprintf(stdout,"Usage: %s input_aln\n",argv[0]);
		exit(1);
	}

	strcpy(infile,argv[1]);

        init_options(&opt);

/* read in the sequences */
	seq_input(infile,opt.explicit_type,FALSE,&mult_aln);
	if(mult_aln.nseqs<=0) {
		error("No sequences in %s\n",infile);
		exit(1);
	}


	window=8;

/* count pairwise residue percent identities */
        tmat = (double **) ckalloc( (mult_aln.nseqs+1) * sizeof (double *) );
        for(i=0;i<mult_aln.nseqs;i++)
                tmat[i] = (double *)ckalloc( (mult_aln.nseqs+1) * sizeof (double) );

	meanscore=0;
        for (i=0,n=0;i<mult_aln.nseqs;i++) {
                for (j=i+1;j<mult_aln.nseqs;j++) {
                        dscore = countid(mult_aln.seqs[i],mult_aln.seqs[j]);
                        tmat[j][i] = tmat[i][j] = (100.0 - dscore)/100.0;
			n++;
			meanscore+=dscore;
                }
        }
	meanscore/=(float)n;

	/*if(mult_aln.nseqs<100) block_cutoff=8;
	else if(mult_aln.nseqs<250) block_cutoff=6;
	else*/ block_cutoff=5;

	if(meanscore>50) block_cutoff=50;

/* make a tree from the percent identities (used for sequence weighting) */
	strcpy(treefile,infile);
	strcat(treefile,".ph");
        if((tree = open_explicit_file(treefile))==NULL) exit(1);

        guide_tree(tree,mult_aln.seqs,mult_aln.nseqs, tmat, QUICKNJ);
        itree=(IN_TREEPTR)ckalloc(sizeof(IN_TREE));

        status = read_tree(treefile, mult_aln.seqs, 0, mult_aln.nseqs,itree);
        for(i=0;i<mult_aln.nseqs;i++)
                ckfree(tmat[i]);
        ckfree(tmat);

        if (status < 0) exit(1);


        seq_weight = calc_seq_weights(0,mult_aln.nseqs,itree,FALSE);
        free_tree(itree);
        remove(treefile);


/* find the start and end positions of each sequence */

	is = (sint *)ckalloc((mult_aln.nseqs+1) * sizeof(sint));
	ie = (sint *)ckalloc((mult_aln.nseqs+1) * sizeof(sint));
	for(s=0;s<mult_aln.nseqs;s++) {
		is[s]=0;
                ie[s] = mult_aln.seqs[s].len;
                for (i=0; i<mult_aln.seqs[s].len; i++) {
                        c = mult_aln.seqs[s].data[i];
                        if (!isalpha(c))
                                is[s]++;
                        else
                                break;
                }
                for (i=mult_aln.seqs[s].len-1; i>=0; i--) {
                        c = mult_aln.seqs[s].data[i];
                        if (!isalpha(c))
                                ie[s]--;
                        else
                                break;
                }
	}

	matrix.format=0;
	maxres = get_cl_matrix(FALSE, gon250mt, gapptr, TRUE, 100, &matrix);

	all_blocks(infile,window,block_cutoff);
}