Exemple #1
0
int main(int argc,char **argv)
{
	int i,nseqs;
	char infile[FILENAMELEN+1];
	char outfile[FILENAMELEN+1];
	ALN mult_aln;
	OPT opt;

	if(argc!=3) {
		fprintf(stderr,"Usage: %s input_aln output_aln\n",argv[0]);
		exit(1);
	}
	strcpy(infile,argv[1]);
	strcpy(outfile,argv[2]);

        init_options(&opt);

	(*opt.alnout_opt).output_clustal=FALSE;
	(*opt.alnout_opt).output_gcg=TRUE;

/* read in the sequences */
	seq_input(infile,opt.explicit_type,FALSE,&mult_aln);
	if(mult_aln.nseqs<=0) {
		fprintf(stderr,"ERROR: No sequences in %s\n",infile);
		exit(1);
	}
	nseqs=mult_aln.nseqs;

/* write out the sequences */
	strcpy((*opt.alnout_opt).gcg_outname, outfile);
	for (i=0;i<mult_aln.nseqs;i++) mult_aln.seqs[i].output_index = i;

	if(!open_alignment_output(infile,opt.alnout_opt)) exit(1);
        create_alignment_output(mult_aln,*opt.alnout_opt);
}
Exemple #2
0
static void format_options_menu(void)      /* format of alignment output */
{	
	sint i;
	sint length = 0;
	char path[FILENAMELEN+1];
    int catchint;

        catchint = signal(SIGINT, SIG_IGN) != SIG_IGN;
        if (catchint) {
                if (setjmp(jmpbuf) != 0)
                        fprintf(stdout,"\n.. Interrupt\n");
#ifdef UNIX
                if (signal(SIGINT,jumper) == BADSIG)
                        fprintf(stdout,"Error: signal\n");
#else
                if (signal(SIGINT,SIG_DFL) == (void*)BADSIG)
                        fprintf(stdout,"Error: signal\n");
#endif
        }


	while(TRUE) {
	fprintf(stdout,"\n\n\n");
	fprintf(stdout," ********* Format of Alignment Output *********\n");
	fprintf(stdout,"\n\n");
	fprintf(stdout,"     F. Toggle FASTA format output       =  %s\n\n",
					(!output_fasta) ? "OFF" : "ON");
	fprintf(stdout,"     1. Toggle CLUSTAL format output     =  %s\n",
					(!output_clustal) ? "OFF" : "ON");
	fprintf(stdout,"     2. Toggle NBRF/PIR format output    =  %s\n",
					(!output_nbrf) ? "OFF" : "ON");
	fprintf(stdout,"     3. Toggle GCG/MSF format output     =  %s\n",
					(!output_gcg) ? "OFF" : "ON");
	fprintf(stdout,"     4. Toggle PHYLIP format output      =  %s\n",
					(!output_phylip) ? "OFF" : "ON");
	fprintf(stdout,"     5. Toggle NEXUS format output       =  %s\n",
					(!output_nexus) ? "OFF" : "ON");
	fprintf(stdout,"     6. Toggle GDE format output         =  %s\n\n",
					(!output_gde) ? "OFF" : "ON");
	fprintf(stdout,"     7. Toggle GDE output case           =  %s\n",
					(!lowercase) ? "UPPER" : "LOWER");

	fprintf(stdout,"     8. Toggle CLUSTALW sequence numbers =  %s\n",
					(!cl_seq_numbers) ? "OFF" : "ON");
	fprintf(stdout,"     9. Toggle output order              =  %s\n\n",
					(output_order==0) ? "INPUT FILE" : "ALIGNED");

	fprintf(stdout,"     0. Create alignment output file(s) now?\n\n");
	fprintf(stdout,"     T. Toggle parameter output          = %s\n",
					(!save_parameters) ? "OFF" : "ON");
	fprintf(stdout,"     R. Toggle sequence range numbers =  %s\n",
					(!seqRange) ? "OFF" : "ON");
	fprintf(stdout,"\n");
	fprintf(stdout,"     H. HELP\n\n\n");	
	
		getstr("Enter number (or [RETURN] to exit)",lin2);
		if(*lin2 == EOS) return;
		
		switch(toupper(*lin2)) {
			case '1':
				output_clustal ^= TRUE;
				break;
			case '2':
              			output_nbrf ^= TRUE;
			  	break;
			case '3':
              			output_gcg ^= TRUE;
			  	break;
			case '4':
              			output_phylip ^= TRUE;
			  	break;
			case '5':
              			output_nexus ^= TRUE;
			  	break;
			case '6':
              			output_gde ^= TRUE;
			  	break;
			case '7':
              			lowercase ^= TRUE;
			  	break;
			case '8':
              			cl_seq_numbers ^= TRUE;
			  	break;
			case '9':
                                if (output_order == INPUT) output_order = ALIGNED;
              			else output_order = INPUT;
			  	break;
			case 'F':
              			output_fasta ^= TRUE;
			  	break;
			case 'R':
              			seqRange ^= TRUE;
			  	break;

			case '0':		/* DES */
				if(empty) {
					error("No sequences loaded");
					break;
				}
				get_path(seqname,path);
				if(!open_alignment_output(path)) break;
				create_alignment_output(1,nseqs);
				break;
        		case 'T': save_parameters ^= TRUE;
	   			 break;
			case '?':
			case 'H':
				get_help('5');
				break;
			default:
				fprintf(stdout,"\n\nUnrecognised Command\n\n");
				break;
		}
	}
}
Exemple #3
0
int main(int argc, char **argv)
{
	FILE *ofd,*ifd;
        ALN mult_aln;
        OPT opt;
	char infile[FILENAMELEN+1];
	char outfile[FILENAMELEN+1];
	int nseqs;
	int  i,j,l,n,ires;
	int err,ix,ntot;
	float min_nn,nn;
	float tmp;
	Boolean eof,found;

	if(argc!=3 && argc!=7 && argc!=8) {
		usage(argv[0]);
		return 0;
	}

	strcpy(infile,argv[1]);
	strcpy(outfile,argv[2]);

/* open the matrix file */
	verbose=FALSE;

	if(argc==3) {
		get_default_matrix();
		go=0.0;
		ge=0.1;
		egap=0.0;
	}
	else {
		if(argc==8) verbose=TRUE;
	

        	if((ifd=fopen(argv[3],"r"))==NULL) {
            	fprintf(stderr,"Cannot open matrix file [%s]",argv[3]);
            	return 0;
        	}
		err=readmatrix(ifd);
		if(err<=0) {
			fprintf(stderr,"Error: bad matrix in %s\n",argv[3]);
			return 0;
		}

		go=atof(argv[4]);
		ge=atof(argv[5]);
		egap=atof(argv[6]);
	}

        init_options(&opt);

        (*opt.alnout_opt).output_clustal=FALSE;
        (*opt.alnout_opt).output_relacs=TRUE;

/* read in the sequences */
        seq_input(infile,opt.explicit_type,FALSE,&mult_aln);
        if(mult_aln.nseqs<=0) {
                error("No sequences in %s\n",infile);
                exit(1);
        }
        nseqs=mult_aln.nseqs;

/* remove the gaps */
	seqlength=0;
	useqlen_array=(int *)ckalloc((nseqs+1)*sizeof(int));
	for(i=0;i<nseqs;i++) {
		if(mult_aln.seqs[i].len>seqlength) seqlength=mult_aln.seqs[i].len;
		l=0;
		for(j=0;j<mult_aln.seqs[i].len;j++)
			if(isalpha(mult_aln.seqs[i].data[j])) {
				l++;
			}
		useqlen_array[i]=l;
	}
        maxlen=0;
        for(i=0;i<nseqs;i++)
                if(useqlen_array[i]>maxlen) maxlen=useqlen_array[i];
        minlen=10000;
        for(i=0;i<nseqs;i++)
                if(useqlen_array[i]<minlen) minlen=useqlen_array[i];
	
/* remove any column score data that already exists in the input file */
	/*if (mult_aln.ncol_scores==1)
		ckfree(mult_aln.col_score[0].data);*/
	ix=mult_aln.ncol_scores;
	mult_aln.col_score[ix].data=(sint *)ckalloc((seqlength+1)*sizeof(sint));
	mult_aln.ncol_scores=ix+1;

/* calculate some simple statistics */
        pcid=(float **)ckalloc((nseqs+1)*sizeof(float *));
        for(i=0;i<nseqs;i++)
                pcid[i]=(float *)ckalloc((nseqs+1)*sizeof(float));
        for(i=0;i<nseqs;i++) {
		for(j=i+1;j<nseqs;j++) {
			pcid[j][i]=pcid[i][j]=pcidentity(mult_aln,i,j);
		}
	}

/* find the nearest neighbor for each sequence */
	min_nn=1.0;
	for(i=0;i<nseqs;i++) {
		nn=0.0;
		for(j=0;j<nseqs;j++) {
			if(i!=j && pcid[i][j]>nn) nn=pcid[i][j];
		}
		if(nn<min_nn) min_nn=nn;
	}

        seqweight=(float **)ckalloc((nseqs+1)*sizeof(float *));
        for(i=0;i<nseqs;i++)
                seqweight[i]=(float *)ckalloc((nseqs+1)*sizeof(float));
        for(i=0;i<nseqs;i++) 
		for(j=i;j<nseqs;j++) {
			seqweight[j][i]=seqweight[i][j]=1.0-pcid[i][j];
		}


	fragment=(Boolean *)ckalloc((nseqs+1)*sizeof(Boolean));


/* calculate pairwise alignment scores using k-tuple scores */
        qpw_id=(float **)ckalloc((nseqs+1)*sizeof(float *));
        for(i=0;i<nseqs;i++)
                qpw_id[i]=(float *)ckalloc((nseqs+1)*sizeof(float));
        for(i=0,n=0;i<nseqs;i++)
                for(j=i+1;j<nseqs;j++) {
			qpw_id[i][j]=100.0*pcid[i][j];
                        if(qpw_id[i][j]<60) {
				qpw_id[i][j]=show_pair(mult_aln,i,j);
			}
			if(qpw_id[i][j]>40) {
				tmp=(float)useqlen_array[i]/(float)useqlen_array[j];
				if(tmp<0.8) fragment[i]=TRUE;
				else if(tmp>1.25) fragment[j]=TRUE;
			}
			n++;
		}

	/*if(verbose)
        for(i=0;i<nseqs;i++)
		if(fragment[i]) fprintf(stdout,"%s fragment %s\n",argv[1],names[i]);*/

/* calculate sequence groups and keep first sequence in each group for processing */
        use_seq=(int *)ckalloc((nseqs+1)*sizeof(int));
	for(i=0;i<nseqs;i++)
		use_seq[i]=2;

	query=0;
	seqgroup=(int *)ckalloc((nseqs+1)*sizeof(int));
	groupseed=(int *)ckalloc((nseqs+1)*sizeof(int));
        calc_groups(query,0.7,nseqs,pcid,seqgroup,groupseed);

	if(ngroups<=0) {
		fprintf(stderr,"Error: problem with sequence grouping\n");
		exit(1);
	}
        for(j=0;j<nseqs;j++) use_seq[j]=(-1);
        for(i=0;i<ngroups;i++) {
		j=groupseed[i];
		use_seq[j]=2;
        }


        for(i=0;i<nseqs;i++)
                ckfree(pcid[i]);
        ckfree(pcid);
	ckfree(groupseed);
	ckfree(seqgroup);


	qpw=(float *)ckalloc((ngroups*ngroups+1)*sizeof(float));

        for(i=0,n=0;i<nseqs;i++)
		if(use_seq[i]>1) 
                for(j=i+1;j<nseqs;j++)
			if(use_seq[j]>1)
                        qpw[n++]=qpw_id[i][j];
        for(i=0;i<nseqs;i++)
                ckfree(qpw_id[i]);
        ckfree(qpw_id);

/* sort the pairwise k-tuple scores into ascending order */
        sort_scores(qpw,0,n-1);

/* calculate the scores for the gaps */
        gop=(float **)ckalloc((nseqs+1)*sizeof(float *));
        for(i=0;i<nseqs;i++)
                gop[i]=(float *)ckalloc((nseqs+1)*sizeof(float));
        gep=(float **)ckalloc((nseqs+1)*sizeof(float *));
        for(i=0;i<nseqs;i++)
                gep[i]=(float *)ckalloc((nseqs+1)*sizeof(float));
        egp=(float **)ckalloc((nseqs+1)*sizeof(float *));
        for(i=0;i<nseqs;i++)
                egp[i]=(float *)ckalloc((nseqs+1)*sizeof(float));

       	for(i=0;i<nseqs;i++) 
		if(use_seq[i]>1)
              	for(j=i+1;j<nseqs;j++)
			if(use_seq[j]>1)
			score_gaps(mult_aln,i,j);

	calc_md(&mult_aln,ix);
	for(i=0,ntot=0;i<nseqs;i++)
		if(use_seq[i]>1) {
			ntot++;
		}
	tmp=set_mdcutoff(ntot,q1);
	normd_rs/=tmp;
	tmp=1.0;
	norm_md/=tmp;

	mult_aln.alnscore=norm_md;
	mult_aln.validalnscore=TRUE;

	if(!verbose) {
        	fprintf(stdout,"%s\t%.3f\n",argv[1],norm_md);
        	/*fprintf(stdout,"%.3f\n",norm_md);*/
	} else {
		/*fprintf(stdout,"%s %.3f %.3f %.3f %.3f %.3f %.3f %d %d %d\n",
		argv[1],norm_md,normd_rs,col,max_colscore,gap_extscore*0.1,q1,nseqs,minlen,maxlen);*/
		fprintf(stdout,"FILE  %s\n",argv[1]);
        	fprintf(stdout,"norMD    %.3f\n",norm_md);
        	/*fprintf(stdout,"norMD_of %.3f\n",norm_md);
        	fprintf(stdout,"norMD_rs %.3f\n",normd_rs);*/
        	fprintf(stdout,"NSEQS    %d\n",nseqs);
        	fprintf(stdout,"MD       %.3f\n",col);
        	fprintf(stdout,"maxMD    %.3f\n",max_colscore);
        	fprintf(stdout,"GOP      %.3f\n",gap_openscore);
        	fprintf(stdout,"GEP      %.3f\n",gap_extscore);
        	fprintf(stdout,"LQR      %.3f\n",q1);
	}

        for(i=0;i<nseqs;i++)
                ckfree(gop[i]);
        ckfree(gop);
        for(i=0;i<nseqs;i++)
                ckfree(gep[i]);
        ckfree(gep);
        for(i=0;i<nseqs;i++)
                ckfree(egp[i]);
        ckfree(egp);

        for(i=0;i<nseqs;i++)
                ckfree(seqweight[i]);
        ckfree(seqweight);
	ckfree(qpw);
	ckfree(use_seq);
	ckfree(fragment);
	ckfree(useqlen_array);

/* write out the sequences */
	strcpy(opt.alnout_opt->relacs_outname,outfile);
        if(!open_alignment_output(outfile,opt.alnout_opt)) exit(1);
        create_alignment_output(mult_aln,*opt.alnout_opt);


	return 0;
}