int main(int argc,char **argv) { int i,nseqs; char infile[FILENAMELEN+1]; char outfile[FILENAMELEN+1]; ALN mult_aln; OPT opt; if(argc!=3) { fprintf(stderr,"Usage: %s input_aln output_aln\n",argv[0]); exit(1); } strcpy(infile,argv[1]); strcpy(outfile,argv[2]); init_options(&opt); (*opt.alnout_opt).output_clustal=FALSE; (*opt.alnout_opt).output_gcg=TRUE; /* read in the sequences */ seq_input(infile,opt.explicit_type,FALSE,&mult_aln); if(mult_aln.nseqs<=0) { fprintf(stderr,"ERROR: No sequences in %s\n",infile); exit(1); } nseqs=mult_aln.nseqs; /* write out the sequences */ strcpy((*opt.alnout_opt).gcg_outname, outfile); for (i=0;i<mult_aln.nseqs;i++) mult_aln.seqs[i].output_index = i; if(!open_alignment_output(infile,opt.alnout_opt)) exit(1); create_alignment_output(mult_aln,*opt.alnout_opt); }
static void format_options_menu(void) /* format of alignment output */ { sint i; sint length = 0; char path[FILENAMELEN+1]; int catchint; catchint = signal(SIGINT, SIG_IGN) != SIG_IGN; if (catchint) { if (setjmp(jmpbuf) != 0) fprintf(stdout,"\n.. Interrupt\n"); #ifdef UNIX if (signal(SIGINT,jumper) == BADSIG) fprintf(stdout,"Error: signal\n"); #else if (signal(SIGINT,SIG_DFL) == (void*)BADSIG) fprintf(stdout,"Error: signal\n"); #endif } while(TRUE) { fprintf(stdout,"\n\n\n"); fprintf(stdout," ********* Format of Alignment Output *********\n"); fprintf(stdout,"\n\n"); fprintf(stdout," F. Toggle FASTA format output = %s\n\n", (!output_fasta) ? "OFF" : "ON"); fprintf(stdout," 1. Toggle CLUSTAL format output = %s\n", (!output_clustal) ? "OFF" : "ON"); fprintf(stdout," 2. Toggle NBRF/PIR format output = %s\n", (!output_nbrf) ? "OFF" : "ON"); fprintf(stdout," 3. Toggle GCG/MSF format output = %s\n", (!output_gcg) ? "OFF" : "ON"); fprintf(stdout," 4. Toggle PHYLIP format output = %s\n", (!output_phylip) ? "OFF" : "ON"); fprintf(stdout," 5. Toggle NEXUS format output = %s\n", (!output_nexus) ? "OFF" : "ON"); fprintf(stdout," 6. Toggle GDE format output = %s\n\n", (!output_gde) ? "OFF" : "ON"); fprintf(stdout," 7. Toggle GDE output case = %s\n", (!lowercase) ? "UPPER" : "LOWER"); fprintf(stdout," 8. Toggle CLUSTALW sequence numbers = %s\n", (!cl_seq_numbers) ? "OFF" : "ON"); fprintf(stdout," 9. Toggle output order = %s\n\n", (output_order==0) ? "INPUT FILE" : "ALIGNED"); fprintf(stdout," 0. Create alignment output file(s) now?\n\n"); fprintf(stdout," T. Toggle parameter output = %s\n", (!save_parameters) ? "OFF" : "ON"); fprintf(stdout," R. Toggle sequence range numbers = %s\n", (!seqRange) ? "OFF" : "ON"); fprintf(stdout,"\n"); fprintf(stdout," H. HELP\n\n\n"); getstr("Enter number (or [RETURN] to exit)",lin2); if(*lin2 == EOS) return; switch(toupper(*lin2)) { case '1': output_clustal ^= TRUE; break; case '2': output_nbrf ^= TRUE; break; case '3': output_gcg ^= TRUE; break; case '4': output_phylip ^= TRUE; break; case '5': output_nexus ^= TRUE; break; case '6': output_gde ^= TRUE; break; case '7': lowercase ^= TRUE; break; case '8': cl_seq_numbers ^= TRUE; break; case '9': if (output_order == INPUT) output_order = ALIGNED; else output_order = INPUT; break; case 'F': output_fasta ^= TRUE; break; case 'R': seqRange ^= TRUE; break; case '0': /* DES */ if(empty) { error("No sequences loaded"); break; } get_path(seqname,path); if(!open_alignment_output(path)) break; create_alignment_output(1,nseqs); break; case 'T': save_parameters ^= TRUE; break; case '?': case 'H': get_help('5'); break; default: fprintf(stdout,"\n\nUnrecognised Command\n\n"); break; } } }
int main(int argc, char **argv) { FILE *ofd,*ifd; ALN mult_aln; OPT opt; char infile[FILENAMELEN+1]; char outfile[FILENAMELEN+1]; int nseqs; int i,j,l,n,ires; int err,ix,ntot; float min_nn,nn; float tmp; Boolean eof,found; if(argc!=3 && argc!=7 && argc!=8) { usage(argv[0]); return 0; } strcpy(infile,argv[1]); strcpy(outfile,argv[2]); /* open the matrix file */ verbose=FALSE; if(argc==3) { get_default_matrix(); go=0.0; ge=0.1; egap=0.0; } else { if(argc==8) verbose=TRUE; if((ifd=fopen(argv[3],"r"))==NULL) { fprintf(stderr,"Cannot open matrix file [%s]",argv[3]); return 0; } err=readmatrix(ifd); if(err<=0) { fprintf(stderr,"Error: bad matrix in %s\n",argv[3]); return 0; } go=atof(argv[4]); ge=atof(argv[5]); egap=atof(argv[6]); } init_options(&opt); (*opt.alnout_opt).output_clustal=FALSE; (*opt.alnout_opt).output_relacs=TRUE; /* read in the sequences */ seq_input(infile,opt.explicit_type,FALSE,&mult_aln); if(mult_aln.nseqs<=0) { error("No sequences in %s\n",infile); exit(1); } nseqs=mult_aln.nseqs; /* remove the gaps */ seqlength=0; useqlen_array=(int *)ckalloc((nseqs+1)*sizeof(int)); for(i=0;i<nseqs;i++) { if(mult_aln.seqs[i].len>seqlength) seqlength=mult_aln.seqs[i].len; l=0; for(j=0;j<mult_aln.seqs[i].len;j++) if(isalpha(mult_aln.seqs[i].data[j])) { l++; } useqlen_array[i]=l; } maxlen=0; for(i=0;i<nseqs;i++) if(useqlen_array[i]>maxlen) maxlen=useqlen_array[i]; minlen=10000; for(i=0;i<nseqs;i++) if(useqlen_array[i]<minlen) minlen=useqlen_array[i]; /* remove any column score data that already exists in the input file */ /*if (mult_aln.ncol_scores==1) ckfree(mult_aln.col_score[0].data);*/ ix=mult_aln.ncol_scores; mult_aln.col_score[ix].data=(sint *)ckalloc((seqlength+1)*sizeof(sint)); mult_aln.ncol_scores=ix+1; /* calculate some simple statistics */ pcid=(float **)ckalloc((nseqs+1)*sizeof(float *)); for(i=0;i<nseqs;i++) pcid[i]=(float *)ckalloc((nseqs+1)*sizeof(float)); for(i=0;i<nseqs;i++) { for(j=i+1;j<nseqs;j++) { pcid[j][i]=pcid[i][j]=pcidentity(mult_aln,i,j); } } /* find the nearest neighbor for each sequence */ min_nn=1.0; for(i=0;i<nseqs;i++) { nn=0.0; for(j=0;j<nseqs;j++) { if(i!=j && pcid[i][j]>nn) nn=pcid[i][j]; } if(nn<min_nn) min_nn=nn; } seqweight=(float **)ckalloc((nseqs+1)*sizeof(float *)); for(i=0;i<nseqs;i++) seqweight[i]=(float *)ckalloc((nseqs+1)*sizeof(float)); for(i=0;i<nseqs;i++) for(j=i;j<nseqs;j++) { seqweight[j][i]=seqweight[i][j]=1.0-pcid[i][j]; } fragment=(Boolean *)ckalloc((nseqs+1)*sizeof(Boolean)); /* calculate pairwise alignment scores using k-tuple scores */ qpw_id=(float **)ckalloc((nseqs+1)*sizeof(float *)); for(i=0;i<nseqs;i++) qpw_id[i]=(float *)ckalloc((nseqs+1)*sizeof(float)); for(i=0,n=0;i<nseqs;i++) for(j=i+1;j<nseqs;j++) { qpw_id[i][j]=100.0*pcid[i][j]; if(qpw_id[i][j]<60) { qpw_id[i][j]=show_pair(mult_aln,i,j); } if(qpw_id[i][j]>40) { tmp=(float)useqlen_array[i]/(float)useqlen_array[j]; if(tmp<0.8) fragment[i]=TRUE; else if(tmp>1.25) fragment[j]=TRUE; } n++; } /*if(verbose) for(i=0;i<nseqs;i++) if(fragment[i]) fprintf(stdout,"%s fragment %s\n",argv[1],names[i]);*/ /* calculate sequence groups and keep first sequence in each group for processing */ use_seq=(int *)ckalloc((nseqs+1)*sizeof(int)); for(i=0;i<nseqs;i++) use_seq[i]=2; query=0; seqgroup=(int *)ckalloc((nseqs+1)*sizeof(int)); groupseed=(int *)ckalloc((nseqs+1)*sizeof(int)); calc_groups(query,0.7,nseqs,pcid,seqgroup,groupseed); if(ngroups<=0) { fprintf(stderr,"Error: problem with sequence grouping\n"); exit(1); } for(j=0;j<nseqs;j++) use_seq[j]=(-1); for(i=0;i<ngroups;i++) { j=groupseed[i]; use_seq[j]=2; } for(i=0;i<nseqs;i++) ckfree(pcid[i]); ckfree(pcid); ckfree(groupseed); ckfree(seqgroup); qpw=(float *)ckalloc((ngroups*ngroups+1)*sizeof(float)); for(i=0,n=0;i<nseqs;i++) if(use_seq[i]>1) for(j=i+1;j<nseqs;j++) if(use_seq[j]>1) qpw[n++]=qpw_id[i][j]; for(i=0;i<nseqs;i++) ckfree(qpw_id[i]); ckfree(qpw_id); /* sort the pairwise k-tuple scores into ascending order */ sort_scores(qpw,0,n-1); /* calculate the scores for the gaps */ gop=(float **)ckalloc((nseqs+1)*sizeof(float *)); for(i=0;i<nseqs;i++) gop[i]=(float *)ckalloc((nseqs+1)*sizeof(float)); gep=(float **)ckalloc((nseqs+1)*sizeof(float *)); for(i=0;i<nseqs;i++) gep[i]=(float *)ckalloc((nseqs+1)*sizeof(float)); egp=(float **)ckalloc((nseqs+1)*sizeof(float *)); for(i=0;i<nseqs;i++) egp[i]=(float *)ckalloc((nseqs+1)*sizeof(float)); for(i=0;i<nseqs;i++) if(use_seq[i]>1) for(j=i+1;j<nseqs;j++) if(use_seq[j]>1) score_gaps(mult_aln,i,j); calc_md(&mult_aln,ix); for(i=0,ntot=0;i<nseqs;i++) if(use_seq[i]>1) { ntot++; } tmp=set_mdcutoff(ntot,q1); normd_rs/=tmp; tmp=1.0; norm_md/=tmp; mult_aln.alnscore=norm_md; mult_aln.validalnscore=TRUE; if(!verbose) { fprintf(stdout,"%s\t%.3f\n",argv[1],norm_md); /*fprintf(stdout,"%.3f\n",norm_md);*/ } else { /*fprintf(stdout,"%s %.3f %.3f %.3f %.3f %.3f %.3f %d %d %d\n", argv[1],norm_md,normd_rs,col,max_colscore,gap_extscore*0.1,q1,nseqs,minlen,maxlen);*/ fprintf(stdout,"FILE %s\n",argv[1]); fprintf(stdout,"norMD %.3f\n",norm_md); /*fprintf(stdout,"norMD_of %.3f\n",norm_md); fprintf(stdout,"norMD_rs %.3f\n",normd_rs);*/ fprintf(stdout,"NSEQS %d\n",nseqs); fprintf(stdout,"MD %.3f\n",col); fprintf(stdout,"maxMD %.3f\n",max_colscore); fprintf(stdout,"GOP %.3f\n",gap_openscore); fprintf(stdout,"GEP %.3f\n",gap_extscore); fprintf(stdout,"LQR %.3f\n",q1); } for(i=0;i<nseqs;i++) ckfree(gop[i]); ckfree(gop); for(i=0;i<nseqs;i++) ckfree(gep[i]); ckfree(gep); for(i=0;i<nseqs;i++) ckfree(egp[i]); ckfree(egp); for(i=0;i<nseqs;i++) ckfree(seqweight[i]); ckfree(seqweight); ckfree(qpw); ckfree(use_seq); ckfree(fragment); ckfree(useqlen_array); /* write out the sequences */ strcpy(opt.alnout_opt->relacs_outname,outfile); if(!open_alignment_output(outfile,opt.alnout_opt)) exit(1); create_alignment_output(mult_aln,*opt.alnout_opt); return 0; }