Пример #1
0
int main(int argc,char **argv)
{
	int i,nseqs;
	char infile[FILENAMELEN+1];
	char outfile[FILENAMELEN+1];
	ALN mult_aln;
	OPT opt;

	if(argc!=3) {
		fprintf(stderr,"Usage: %s input_aln output_aln\n",argv[0]);
		exit(1);
	}
	strcpy(infile,argv[1]);
	strcpy(outfile,argv[2]);

        init_options(&opt);

	(*opt.alnout_opt).output_clustal=FALSE;
	(*opt.alnout_opt).output_gcg=TRUE;

/* read in the sequences */
	seq_input(infile,opt.explicit_type,FALSE,&mult_aln);
	if(mult_aln.nseqs<=0) {
		fprintf(stderr,"ERROR: No sequences in %s\n",infile);
		exit(1);
	}
	nseqs=mult_aln.nseqs;

/* write out the sequences */
	strcpy((*opt.alnout_opt).gcg_outname, outfile);
	for (i=0;i<mult_aln.nseqs;i++) mult_aln.seqs[i].output_index = i;

	if(!open_alignment_output(infile,opt.alnout_opt)) exit(1);
        create_alignment_output(mult_aln,*opt.alnout_opt);
}
Пример #2
0
int main(int argc,char **argv)
{
	sint i,j,k,n,s;
	sint status;
	sint result_type;
	char c;
	char infile[FILENAMELEN+1];
	char treefile[FILENAMELEN+1];
	float meanid;
	FILE *tree;
	sint maxres,*gapptr=NULL;
	IN_TREEPTR itree;
	double dscore;
	double meanscore;
	double **tmat;
	sint window;
	sint block_cutoff;
	OPT opt;

	if(argc!=2) {
		fprintf(stdout,"Usage: %s input_aln\n",argv[0]);
		exit(1);
	}

	strcpy(infile,argv[1]);

        init_options(&opt);

/* read in the sequences */
	seq_input(infile,opt.explicit_type,FALSE,&mult_aln);
	if(mult_aln.nseqs<=0) {
		error("No sequences in %s\n",infile);
		exit(1);
	}


	window=8;

/* count pairwise residue percent identities */
        tmat = (double **) ckalloc( (mult_aln.nseqs+1) * sizeof (double *) );
        for(i=0;i<mult_aln.nseqs;i++)
                tmat[i] = (double *)ckalloc( (mult_aln.nseqs+1) * sizeof (double) );

	meanscore=0;
        for (i=0,n=0;i<mult_aln.nseqs;i++) {
                for (j=i+1;j<mult_aln.nseqs;j++) {
                        dscore = countid(mult_aln.seqs[i],mult_aln.seqs[j]);
                        tmat[j][i] = tmat[i][j] = (100.0 - dscore)/100.0;
			n++;
			meanscore+=dscore;
                }
        }
	meanscore/=(float)n;

	/*if(mult_aln.nseqs<100) block_cutoff=8;
	else if(mult_aln.nseqs<250) block_cutoff=6;
	else*/ block_cutoff=5;

	if(meanscore>50) block_cutoff=50;

/* make a tree from the percent identities (used for sequence weighting) */
	strcpy(treefile,infile);
	strcat(treefile,".ph");
        if((tree = open_explicit_file(treefile))==NULL) exit(1);

        guide_tree(tree,mult_aln.seqs,mult_aln.nseqs, tmat, QUICKNJ);
        itree=(IN_TREEPTR)ckalloc(sizeof(IN_TREE));

        status = read_tree(treefile, mult_aln.seqs, 0, mult_aln.nseqs,itree);
        for(i=0;i<mult_aln.nseqs;i++)
                ckfree(tmat[i]);
        ckfree(tmat);

        if (status < 0) exit(1);


        seq_weight = calc_seq_weights(0,mult_aln.nseqs,itree,FALSE);
        free_tree(itree);
        remove(treefile);


/* find the start and end positions of each sequence */

	is = (sint *)ckalloc((mult_aln.nseqs+1) * sizeof(sint));
	ie = (sint *)ckalloc((mult_aln.nseqs+1) * sizeof(sint));
	for(s=0;s<mult_aln.nseqs;s++) {
		is[s]=0;
                ie[s] = mult_aln.seqs[s].len;
                for (i=0; i<mult_aln.seqs[s].len; i++) {
                        c = mult_aln.seqs[s].data[i];
                        if (!isalpha(c))
                                is[s]++;
                        else
                                break;
                }
                for (i=mult_aln.seqs[s].len-1; i>=0; i--) {
                        c = mult_aln.seqs[s].data[i];
                        if (!isalpha(c))
                                ie[s]--;
                        else
                                break;
                }
	}

	matrix.format=0;
	maxres = get_cl_matrix(FALSE, gon250mt, gapptr, TRUE, 100, &matrix);

	all_blocks(infile,window,block_cutoff);
}
Пример #3
0
void main_menu(void)
{
        int catchint;

        catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
        if (catchint) {
                if (setjmp(jmpbuf) != 0)
                        fprintf(stdout,"\n.. Interrupt\n");
#ifdef UNIX
                if (signal(SIGINT,jumper) == BADSIG)
                        fprintf(stdout,"Error: signal\n");
#else
                if (signal(SIGINT,SIG_DFL) == (void*)BADSIG)
                        fprintf(stdout,"Error: signal\n");
#endif
        }

	while(TRUE) {
		fprintf(stdout,"\n\n\n");
		fprintf(stdout," **************************************************************\n");
		fprintf(stdout," ******** CLUSTAL %s Multiple Sequence Alignments  ********\n",revision_level);
		fprintf(stdout," **************************************************************\n");
		fprintf(stdout,"\n\n");
		
		fprintf(stdout,"     1. Sequence Input From Disc\n");
		fprintf(stdout,"     2. Multiple Alignments\n");
		fprintf(stdout,"     3. Profile / Structure Alignments\n");
		fprintf(stdout,"     4. Phylogenetic trees\n");
		fprintf(stdout,"\n");
		fprintf(stdout,"     S. Execute a system command\n");
		fprintf(stdout,"     H. HELP\n");
		fprintf(stdout,"     X. EXIT (leave program)\n\n\n");
		
		getstr("Your choice",lin1);

		switch(toupper(*lin1)) {
			case '1': seq_input(FALSE);
				phylip_name[0]=EOS;
				clustal_name[0]=EOS;
				dist_name[0]=EOS;
				nexus_name[0]=EOS;
				break;
			case '2': multiple_align_menu();
				break;
			case '3': profile_align_menu();
				break;
			case '4': phylogenetic_tree_menu();
				break;
			case 'S': do_system();
				break;
			case '?':
			case 'H': get_help('1');
				break;
			case 'Q':
			case 'X': exit(0);
				break;
			default: fprintf(stdout,"\n\nUnrecognised Command\n\n");
				break;
		}
	}
}
Пример #4
0
static void phylogenetic_tree_menu(void)
{
        int catchint;

        catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
        if (catchint) {
                if (setjmp(jmpbuf) != 0)
                        fprintf(stdout,"\n.. Interrupt\n");
#ifdef UNIX
                if (signal(SIGINT,jumper) == BADSIG)
                        fprintf(stdout,"Error: signal\n");
#else
                if (signal(SIGINT,SIG_DFL) == (void*)BADSIG)
                        fprintf(stdout,"Error: signal\n");
#endif
        }


    while(TRUE)
    {
        fprintf(stdout,"\n\n\n");
        fprintf(stdout,"****** PHYLOGENETIC TREE MENU ******\n");
        fprintf(stdout,"\n\n");

        fprintf(stdout,"    1.  Input an alignment\n");
        fprintf(stdout,"    2.  Exclude positions with gaps?        ");
	if(tossgaps)
		fprintf(stdout,"= ON\n");
	else
		fprintf(stdout,"= OFF\n");
        fprintf(stdout,"    3.  Correct for multiple substitutions? ");
	if(kimura)
		fprintf(stdout,"= ON\n");
	else
		fprintf(stdout,"= OFF\n");
        fprintf(stdout,"    4.  Draw tree now\n");
        fprintf(stdout,"    5.  Bootstrap tree\n");
	fprintf(stdout,"    6.  Output format options\n");
        fprintf(stdout,"\n");
        fprintf(stdout,"    S.  Execute a system command\n");
        fprintf(stdout,"    H.  HELP\n");
        fprintf(stdout,"    or press [RETURN] to go back to main menu\n\n\n");

        getstr("Your choice",lin1);
        if(*lin1 == EOS) return;

        switch(toupper(*lin1))
        {
       	 	case '1': seq_input(FALSE);
				phylip_name[0]=EOS;
				clustal_name[0]=EOS;
				dist_name[0]=EOS;
				nexus_name[0]=EOS;
         	   	break;
        	case '2': tossgaps ^= TRUE;
          	  	break;
      		case '3': kimura ^= TRUE;;
            		break;
        	case '4': phylogenetic_tree(phylip_name,clustal_name,dist_name,nexus_name,"amenu.pim");
            		break;
        	case '5': bootstrap_tree(phylip_name,clustal_name,nexus_name);
            		break;
		case '6': tree_format_options_menu();
			break;
        	case 'S': do_system();
            		break;
            	case '?':
        	case 'H': get_help('7');
            		break;
            	case 'Q':
        	case 'X': return;

        	default: fprintf(stdout,"\n\nUnrecognised Command\n\n");
            	break;
        }
    }
}
Пример #5
0
int main(int argc, char **argv)
{
	FILE *ofd,*ifd;
        ALN mult_aln;
        OPT opt;
	char infile[FILENAMELEN+1];
	char outfile[FILENAMELEN+1];
	int nseqs;
	int  i,j,l,n,ires;
	int err,ix,ntot;
	float min_nn,nn;
	float tmp;
	Boolean eof,found;

	if(argc!=3 && argc!=7 && argc!=8) {
		usage(argv[0]);
		return 0;
	}

	strcpy(infile,argv[1]);
	strcpy(outfile,argv[2]);

/* open the matrix file */
	verbose=FALSE;

	if(argc==3) {
		get_default_matrix();
		go=0.0;
		ge=0.1;
		egap=0.0;
	}
	else {
		if(argc==8) verbose=TRUE;
	

        	if((ifd=fopen(argv[3],"r"))==NULL) {
            	fprintf(stderr,"Cannot open matrix file [%s]",argv[3]);
            	return 0;
        	}
		err=readmatrix(ifd);
		if(err<=0) {
			fprintf(stderr,"Error: bad matrix in %s\n",argv[3]);
			return 0;
		}

		go=atof(argv[4]);
		ge=atof(argv[5]);
		egap=atof(argv[6]);
	}

        init_options(&opt);

        (*opt.alnout_opt).output_clustal=FALSE;
        (*opt.alnout_opt).output_relacs=TRUE;

/* read in the sequences */
        seq_input(infile,opt.explicit_type,FALSE,&mult_aln);
        if(mult_aln.nseqs<=0) {
                error("No sequences in %s\n",infile);
                exit(1);
        }
        nseqs=mult_aln.nseqs;

/* remove the gaps */
	seqlength=0;
	useqlen_array=(int *)ckalloc((nseqs+1)*sizeof(int));
	for(i=0;i<nseqs;i++) {
		if(mult_aln.seqs[i].len>seqlength) seqlength=mult_aln.seqs[i].len;
		l=0;
		for(j=0;j<mult_aln.seqs[i].len;j++)
			if(isalpha(mult_aln.seqs[i].data[j])) {
				l++;
			}
		useqlen_array[i]=l;
	}
        maxlen=0;
        for(i=0;i<nseqs;i++)
                if(useqlen_array[i]>maxlen) maxlen=useqlen_array[i];
        minlen=10000;
        for(i=0;i<nseqs;i++)
                if(useqlen_array[i]<minlen) minlen=useqlen_array[i];
	
/* remove any column score data that already exists in the input file */
	/*if (mult_aln.ncol_scores==1)
		ckfree(mult_aln.col_score[0].data);*/
	ix=mult_aln.ncol_scores;
	mult_aln.col_score[ix].data=(sint *)ckalloc((seqlength+1)*sizeof(sint));
	mult_aln.ncol_scores=ix+1;

/* calculate some simple statistics */
        pcid=(float **)ckalloc((nseqs+1)*sizeof(float *));
        for(i=0;i<nseqs;i++)
                pcid[i]=(float *)ckalloc((nseqs+1)*sizeof(float));
        for(i=0;i<nseqs;i++) {
		for(j=i+1;j<nseqs;j++) {
			pcid[j][i]=pcid[i][j]=pcidentity(mult_aln,i,j);
		}
	}

/* find the nearest neighbor for each sequence */
	min_nn=1.0;
	for(i=0;i<nseqs;i++) {
		nn=0.0;
		for(j=0;j<nseqs;j++) {
			if(i!=j && pcid[i][j]>nn) nn=pcid[i][j];
		}
		if(nn<min_nn) min_nn=nn;
	}

        seqweight=(float **)ckalloc((nseqs+1)*sizeof(float *));
        for(i=0;i<nseqs;i++)
                seqweight[i]=(float *)ckalloc((nseqs+1)*sizeof(float));
        for(i=0;i<nseqs;i++) 
		for(j=i;j<nseqs;j++) {
			seqweight[j][i]=seqweight[i][j]=1.0-pcid[i][j];
		}


	fragment=(Boolean *)ckalloc((nseqs+1)*sizeof(Boolean));


/* calculate pairwise alignment scores using k-tuple scores */
        qpw_id=(float **)ckalloc((nseqs+1)*sizeof(float *));
        for(i=0;i<nseqs;i++)
                qpw_id[i]=(float *)ckalloc((nseqs+1)*sizeof(float));
        for(i=0,n=0;i<nseqs;i++)
                for(j=i+1;j<nseqs;j++) {
			qpw_id[i][j]=100.0*pcid[i][j];
                        if(qpw_id[i][j]<60) {
				qpw_id[i][j]=show_pair(mult_aln,i,j);
			}
			if(qpw_id[i][j]>40) {
				tmp=(float)useqlen_array[i]/(float)useqlen_array[j];
				if(tmp<0.8) fragment[i]=TRUE;
				else if(tmp>1.25) fragment[j]=TRUE;
			}
			n++;
		}

	/*if(verbose)
        for(i=0;i<nseqs;i++)
		if(fragment[i]) fprintf(stdout,"%s fragment %s\n",argv[1],names[i]);*/

/* calculate sequence groups and keep first sequence in each group for processing */
        use_seq=(int *)ckalloc((nseqs+1)*sizeof(int));
	for(i=0;i<nseqs;i++)
		use_seq[i]=2;

	query=0;
	seqgroup=(int *)ckalloc((nseqs+1)*sizeof(int));
	groupseed=(int *)ckalloc((nseqs+1)*sizeof(int));
        calc_groups(query,0.7,nseqs,pcid,seqgroup,groupseed);

	if(ngroups<=0) {
		fprintf(stderr,"Error: problem with sequence grouping\n");
		exit(1);
	}
        for(j=0;j<nseqs;j++) use_seq[j]=(-1);
        for(i=0;i<ngroups;i++) {
		j=groupseed[i];
		use_seq[j]=2;
        }


        for(i=0;i<nseqs;i++)
                ckfree(pcid[i]);
        ckfree(pcid);
	ckfree(groupseed);
	ckfree(seqgroup);


	qpw=(float *)ckalloc((ngroups*ngroups+1)*sizeof(float));

        for(i=0,n=0;i<nseqs;i++)
		if(use_seq[i]>1) 
                for(j=i+1;j<nseqs;j++)
			if(use_seq[j]>1)
                        qpw[n++]=qpw_id[i][j];
        for(i=0;i<nseqs;i++)
                ckfree(qpw_id[i]);
        ckfree(qpw_id);

/* sort the pairwise k-tuple scores into ascending order */
        sort_scores(qpw,0,n-1);

/* calculate the scores for the gaps */
        gop=(float **)ckalloc((nseqs+1)*sizeof(float *));
        for(i=0;i<nseqs;i++)
                gop[i]=(float *)ckalloc((nseqs+1)*sizeof(float));
        gep=(float **)ckalloc((nseqs+1)*sizeof(float *));
        for(i=0;i<nseqs;i++)
                gep[i]=(float *)ckalloc((nseqs+1)*sizeof(float));
        egp=(float **)ckalloc((nseqs+1)*sizeof(float *));
        for(i=0;i<nseqs;i++)
                egp[i]=(float *)ckalloc((nseqs+1)*sizeof(float));

       	for(i=0;i<nseqs;i++) 
		if(use_seq[i]>1)
              	for(j=i+1;j<nseqs;j++)
			if(use_seq[j]>1)
			score_gaps(mult_aln,i,j);

	calc_md(&mult_aln,ix);
	for(i=0,ntot=0;i<nseqs;i++)
		if(use_seq[i]>1) {
			ntot++;
		}
	tmp=set_mdcutoff(ntot,q1);
	normd_rs/=tmp;
	tmp=1.0;
	norm_md/=tmp;

	mult_aln.alnscore=norm_md;
	mult_aln.validalnscore=TRUE;

	if(!verbose) {
        	fprintf(stdout,"%s\t%.3f\n",argv[1],norm_md);
        	/*fprintf(stdout,"%.3f\n",norm_md);*/
	} else {
		/*fprintf(stdout,"%s %.3f %.3f %.3f %.3f %.3f %.3f %d %d %d\n",
		argv[1],norm_md,normd_rs,col,max_colscore,gap_extscore*0.1,q1,nseqs,minlen,maxlen);*/
		fprintf(stdout,"FILE  %s\n",argv[1]);
        	fprintf(stdout,"norMD    %.3f\n",norm_md);
        	/*fprintf(stdout,"norMD_of %.3f\n",norm_md);
        	fprintf(stdout,"norMD_rs %.3f\n",normd_rs);*/
        	fprintf(stdout,"NSEQS    %d\n",nseqs);
        	fprintf(stdout,"MD       %.3f\n",col);
        	fprintf(stdout,"maxMD    %.3f\n",max_colscore);
        	fprintf(stdout,"GOP      %.3f\n",gap_openscore);
        	fprintf(stdout,"GEP      %.3f\n",gap_extscore);
        	fprintf(stdout,"LQR      %.3f\n",q1);
	}

        for(i=0;i<nseqs;i++)
                ckfree(gop[i]);
        ckfree(gop);
        for(i=0;i<nseqs;i++)
                ckfree(gep[i]);
        ckfree(gep);
        for(i=0;i<nseqs;i++)
                ckfree(egp[i]);
        ckfree(egp);

        for(i=0;i<nseqs;i++)
                ckfree(seqweight[i]);
        ckfree(seqweight);
	ckfree(qpw);
	ckfree(use_seq);
	ckfree(fragment);
	ckfree(useqlen_array);

/* write out the sequences */
	strcpy(opt.alnout_opt->relacs_outname,outfile);
        if(!open_alignment_output(outfile,opt.alnout_opt)) exit(1);
        create_alignment_output(mult_aln,*opt.alnout_opt);


	return 0;
}