int main(int argc,char **argv) { int i,nseqs; char infile[FILENAMELEN+1]; char outfile[FILENAMELEN+1]; ALN mult_aln; OPT opt; if(argc!=3) { fprintf(stderr,"Usage: %s input_aln output_aln\n",argv[0]); exit(1); } strcpy(infile,argv[1]); strcpy(outfile,argv[2]); init_options(&opt); (*opt.alnout_opt).output_clustal=FALSE; (*opt.alnout_opt).output_gcg=TRUE; /* read in the sequences */ seq_input(infile,opt.explicit_type,FALSE,&mult_aln); if(mult_aln.nseqs<=0) { fprintf(stderr,"ERROR: No sequences in %s\n",infile); exit(1); } nseqs=mult_aln.nseqs; /* write out the sequences */ strcpy((*opt.alnout_opt).gcg_outname, outfile); for (i=0;i<mult_aln.nseqs;i++) mult_aln.seqs[i].output_index = i; if(!open_alignment_output(infile,opt.alnout_opt)) exit(1); create_alignment_output(mult_aln,*opt.alnout_opt); }
int main(int argc,char **argv) { sint i,j,k,n,s; sint status; sint result_type; char c; char infile[FILENAMELEN+1]; char treefile[FILENAMELEN+1]; float meanid; FILE *tree; sint maxres,*gapptr=NULL; IN_TREEPTR itree; double dscore; double meanscore; double **tmat; sint window; sint block_cutoff; OPT opt; if(argc!=2) { fprintf(stdout,"Usage: %s input_aln\n",argv[0]); exit(1); } strcpy(infile,argv[1]); init_options(&opt); /* read in the sequences */ seq_input(infile,opt.explicit_type,FALSE,&mult_aln); if(mult_aln.nseqs<=0) { error("No sequences in %s\n",infile); exit(1); } window=8; /* count pairwise residue percent identities */ tmat = (double **) ckalloc( (mult_aln.nseqs+1) * sizeof (double *) ); for(i=0;i<mult_aln.nseqs;i++) tmat[i] = (double *)ckalloc( (mult_aln.nseqs+1) * sizeof (double) ); meanscore=0; for (i=0,n=0;i<mult_aln.nseqs;i++) { for (j=i+1;j<mult_aln.nseqs;j++) { dscore = countid(mult_aln.seqs[i],mult_aln.seqs[j]); tmat[j][i] = tmat[i][j] = (100.0 - dscore)/100.0; n++; meanscore+=dscore; } } meanscore/=(float)n; /*if(mult_aln.nseqs<100) block_cutoff=8; else if(mult_aln.nseqs<250) block_cutoff=6; else*/ block_cutoff=5; if(meanscore>50) block_cutoff=50; /* make a tree from the percent identities (used for sequence weighting) */ strcpy(treefile,infile); strcat(treefile,".ph"); if((tree = open_explicit_file(treefile))==NULL) exit(1); guide_tree(tree,mult_aln.seqs,mult_aln.nseqs, tmat, QUICKNJ); itree=(IN_TREEPTR)ckalloc(sizeof(IN_TREE)); status = read_tree(treefile, mult_aln.seqs, 0, mult_aln.nseqs,itree); for(i=0;i<mult_aln.nseqs;i++) ckfree(tmat[i]); ckfree(tmat); if (status < 0) exit(1); seq_weight = calc_seq_weights(0,mult_aln.nseqs,itree,FALSE); free_tree(itree); remove(treefile); /* find the start and end positions of each sequence */ is = (sint *)ckalloc((mult_aln.nseqs+1) * sizeof(sint)); ie = (sint *)ckalloc((mult_aln.nseqs+1) * sizeof(sint)); for(s=0;s<mult_aln.nseqs;s++) { is[s]=0; ie[s] = mult_aln.seqs[s].len; for (i=0; i<mult_aln.seqs[s].len; i++) { c = mult_aln.seqs[s].data[i]; if (!isalpha(c)) is[s]++; else break; } for (i=mult_aln.seqs[s].len-1; i>=0; i--) { c = mult_aln.seqs[s].data[i]; if (!isalpha(c)) ie[s]--; else break; } } matrix.format=0; maxres = get_cl_matrix(FALSE, gon250mt, gapptr, TRUE, 100, &matrix); all_blocks(infile,window,block_cutoff); }
void main_menu(void) { int catchint; catchint = signal(SIGINT, SIG_IGN) != SIG_IGN; if (catchint) { if (setjmp(jmpbuf) != 0) fprintf(stdout,"\n.. Interrupt\n"); #ifdef UNIX if (signal(SIGINT,jumper) == BADSIG) fprintf(stdout,"Error: signal\n"); #else if (signal(SIGINT,SIG_DFL) == (void*)BADSIG) fprintf(stdout,"Error: signal\n"); #endif } while(TRUE) { fprintf(stdout,"\n\n\n"); fprintf(stdout," **************************************************************\n"); fprintf(stdout," ******** CLUSTAL %s Multiple Sequence Alignments ********\n",revision_level); fprintf(stdout," **************************************************************\n"); fprintf(stdout,"\n\n"); fprintf(stdout," 1. Sequence Input From Disc\n"); fprintf(stdout," 2. Multiple Alignments\n"); fprintf(stdout," 3. Profile / Structure Alignments\n"); fprintf(stdout," 4. Phylogenetic trees\n"); fprintf(stdout,"\n"); fprintf(stdout," S. Execute a system command\n"); fprintf(stdout," H. HELP\n"); fprintf(stdout," X. EXIT (leave program)\n\n\n"); getstr("Your choice",lin1); switch(toupper(*lin1)) { case '1': seq_input(FALSE); phylip_name[0]=EOS; clustal_name[0]=EOS; dist_name[0]=EOS; nexus_name[0]=EOS; break; case '2': multiple_align_menu(); break; case '3': profile_align_menu(); break; case '4': phylogenetic_tree_menu(); break; case 'S': do_system(); break; case '?': case 'H': get_help('1'); break; case 'Q': case 'X': exit(0); break; default: fprintf(stdout,"\n\nUnrecognised Command\n\n"); break; } } }
static void phylogenetic_tree_menu(void) { int catchint; catchint = signal(SIGINT, SIG_IGN) != SIG_IGN; if (catchint) { if (setjmp(jmpbuf) != 0) fprintf(stdout,"\n.. Interrupt\n"); #ifdef UNIX if (signal(SIGINT,jumper) == BADSIG) fprintf(stdout,"Error: signal\n"); #else if (signal(SIGINT,SIG_DFL) == (void*)BADSIG) fprintf(stdout,"Error: signal\n"); #endif } while(TRUE) { fprintf(stdout,"\n\n\n"); fprintf(stdout,"****** PHYLOGENETIC TREE MENU ******\n"); fprintf(stdout,"\n\n"); fprintf(stdout," 1. Input an alignment\n"); fprintf(stdout," 2. Exclude positions with gaps? "); if(tossgaps) fprintf(stdout,"= ON\n"); else fprintf(stdout,"= OFF\n"); fprintf(stdout," 3. Correct for multiple substitutions? "); if(kimura) fprintf(stdout,"= ON\n"); else fprintf(stdout,"= OFF\n"); fprintf(stdout," 4. Draw tree now\n"); fprintf(stdout," 5. Bootstrap tree\n"); fprintf(stdout," 6. Output format options\n"); fprintf(stdout,"\n"); fprintf(stdout," S. Execute a system command\n"); fprintf(stdout," H. HELP\n"); fprintf(stdout," or press [RETURN] to go back to main menu\n\n\n"); getstr("Your choice",lin1); if(*lin1 == EOS) return; switch(toupper(*lin1)) { case '1': seq_input(FALSE); phylip_name[0]=EOS; clustal_name[0]=EOS; dist_name[0]=EOS; nexus_name[0]=EOS; break; case '2': tossgaps ^= TRUE; break; case '3': kimura ^= TRUE;; break; case '4': phylogenetic_tree(phylip_name,clustal_name,dist_name,nexus_name,"amenu.pim"); break; case '5': bootstrap_tree(phylip_name,clustal_name,nexus_name); break; case '6': tree_format_options_menu(); break; case 'S': do_system(); break; case '?': case 'H': get_help('7'); break; case 'Q': case 'X': return; default: fprintf(stdout,"\n\nUnrecognised Command\n\n"); break; } } }
int main(int argc, char **argv) { FILE *ofd,*ifd; ALN mult_aln; OPT opt; char infile[FILENAMELEN+1]; char outfile[FILENAMELEN+1]; int nseqs; int i,j,l,n,ires; int err,ix,ntot; float min_nn,nn; float tmp; Boolean eof,found; if(argc!=3 && argc!=7 && argc!=8) { usage(argv[0]); return 0; } strcpy(infile,argv[1]); strcpy(outfile,argv[2]); /* open the matrix file */ verbose=FALSE; if(argc==3) { get_default_matrix(); go=0.0; ge=0.1; egap=0.0; } else { if(argc==8) verbose=TRUE; if((ifd=fopen(argv[3],"r"))==NULL) { fprintf(stderr,"Cannot open matrix file [%s]",argv[3]); return 0; } err=readmatrix(ifd); if(err<=0) { fprintf(stderr,"Error: bad matrix in %s\n",argv[3]); return 0; } go=atof(argv[4]); ge=atof(argv[5]); egap=atof(argv[6]); } init_options(&opt); (*opt.alnout_opt).output_clustal=FALSE; (*opt.alnout_opt).output_relacs=TRUE; /* read in the sequences */ seq_input(infile,opt.explicit_type,FALSE,&mult_aln); if(mult_aln.nseqs<=0) { error("No sequences in %s\n",infile); exit(1); } nseqs=mult_aln.nseqs; /* remove the gaps */ seqlength=0; useqlen_array=(int *)ckalloc((nseqs+1)*sizeof(int)); for(i=0;i<nseqs;i++) { if(mult_aln.seqs[i].len>seqlength) seqlength=mult_aln.seqs[i].len; l=0; for(j=0;j<mult_aln.seqs[i].len;j++) if(isalpha(mult_aln.seqs[i].data[j])) { l++; } useqlen_array[i]=l; } maxlen=0; for(i=0;i<nseqs;i++) if(useqlen_array[i]>maxlen) maxlen=useqlen_array[i]; minlen=10000; for(i=0;i<nseqs;i++) if(useqlen_array[i]<minlen) minlen=useqlen_array[i]; /* remove any column score data that already exists in the input file */ /*if (mult_aln.ncol_scores==1) ckfree(mult_aln.col_score[0].data);*/ ix=mult_aln.ncol_scores; mult_aln.col_score[ix].data=(sint *)ckalloc((seqlength+1)*sizeof(sint)); mult_aln.ncol_scores=ix+1; /* calculate some simple statistics */ pcid=(float **)ckalloc((nseqs+1)*sizeof(float *)); for(i=0;i<nseqs;i++) pcid[i]=(float *)ckalloc((nseqs+1)*sizeof(float)); for(i=0;i<nseqs;i++) { for(j=i+1;j<nseqs;j++) { pcid[j][i]=pcid[i][j]=pcidentity(mult_aln,i,j); } } /* find the nearest neighbor for each sequence */ min_nn=1.0; for(i=0;i<nseqs;i++) { nn=0.0; for(j=0;j<nseqs;j++) { if(i!=j && pcid[i][j]>nn) nn=pcid[i][j]; } if(nn<min_nn) min_nn=nn; } seqweight=(float **)ckalloc((nseqs+1)*sizeof(float *)); for(i=0;i<nseqs;i++) seqweight[i]=(float *)ckalloc((nseqs+1)*sizeof(float)); for(i=0;i<nseqs;i++) for(j=i;j<nseqs;j++) { seqweight[j][i]=seqweight[i][j]=1.0-pcid[i][j]; } fragment=(Boolean *)ckalloc((nseqs+1)*sizeof(Boolean)); /* calculate pairwise alignment scores using k-tuple scores */ qpw_id=(float **)ckalloc((nseqs+1)*sizeof(float *)); for(i=0;i<nseqs;i++) qpw_id[i]=(float *)ckalloc((nseqs+1)*sizeof(float)); for(i=0,n=0;i<nseqs;i++) for(j=i+1;j<nseqs;j++) { qpw_id[i][j]=100.0*pcid[i][j]; if(qpw_id[i][j]<60) { qpw_id[i][j]=show_pair(mult_aln,i,j); } if(qpw_id[i][j]>40) { tmp=(float)useqlen_array[i]/(float)useqlen_array[j]; if(tmp<0.8) fragment[i]=TRUE; else if(tmp>1.25) fragment[j]=TRUE; } n++; } /*if(verbose) for(i=0;i<nseqs;i++) if(fragment[i]) fprintf(stdout,"%s fragment %s\n",argv[1],names[i]);*/ /* calculate sequence groups and keep first sequence in each group for processing */ use_seq=(int *)ckalloc((nseqs+1)*sizeof(int)); for(i=0;i<nseqs;i++) use_seq[i]=2; query=0; seqgroup=(int *)ckalloc((nseqs+1)*sizeof(int)); groupseed=(int *)ckalloc((nseqs+1)*sizeof(int)); calc_groups(query,0.7,nseqs,pcid,seqgroup,groupseed); if(ngroups<=0) { fprintf(stderr,"Error: problem with sequence grouping\n"); exit(1); } for(j=0;j<nseqs;j++) use_seq[j]=(-1); for(i=0;i<ngroups;i++) { j=groupseed[i]; use_seq[j]=2; } for(i=0;i<nseqs;i++) ckfree(pcid[i]); ckfree(pcid); ckfree(groupseed); ckfree(seqgroup); qpw=(float *)ckalloc((ngroups*ngroups+1)*sizeof(float)); for(i=0,n=0;i<nseqs;i++) if(use_seq[i]>1) for(j=i+1;j<nseqs;j++) if(use_seq[j]>1) qpw[n++]=qpw_id[i][j]; for(i=0;i<nseqs;i++) ckfree(qpw_id[i]); ckfree(qpw_id); /* sort the pairwise k-tuple scores into ascending order */ sort_scores(qpw,0,n-1); /* calculate the scores for the gaps */ gop=(float **)ckalloc((nseqs+1)*sizeof(float *)); for(i=0;i<nseqs;i++) gop[i]=(float *)ckalloc((nseqs+1)*sizeof(float)); gep=(float **)ckalloc((nseqs+1)*sizeof(float *)); for(i=0;i<nseqs;i++) gep[i]=(float *)ckalloc((nseqs+1)*sizeof(float)); egp=(float **)ckalloc((nseqs+1)*sizeof(float *)); for(i=0;i<nseqs;i++) egp[i]=(float *)ckalloc((nseqs+1)*sizeof(float)); for(i=0;i<nseqs;i++) if(use_seq[i]>1) for(j=i+1;j<nseqs;j++) if(use_seq[j]>1) score_gaps(mult_aln,i,j); calc_md(&mult_aln,ix); for(i=0,ntot=0;i<nseqs;i++) if(use_seq[i]>1) { ntot++; } tmp=set_mdcutoff(ntot,q1); normd_rs/=tmp; tmp=1.0; norm_md/=tmp; mult_aln.alnscore=norm_md; mult_aln.validalnscore=TRUE; if(!verbose) { fprintf(stdout,"%s\t%.3f\n",argv[1],norm_md); /*fprintf(stdout,"%.3f\n",norm_md);*/ } else { /*fprintf(stdout,"%s %.3f %.3f %.3f %.3f %.3f %.3f %d %d %d\n", argv[1],norm_md,normd_rs,col,max_colscore,gap_extscore*0.1,q1,nseqs,minlen,maxlen);*/ fprintf(stdout,"FILE %s\n",argv[1]); fprintf(stdout,"norMD %.3f\n",norm_md); /*fprintf(stdout,"norMD_of %.3f\n",norm_md); fprintf(stdout,"norMD_rs %.3f\n",normd_rs);*/ fprintf(stdout,"NSEQS %d\n",nseqs); fprintf(stdout,"MD %.3f\n",col); fprintf(stdout,"maxMD %.3f\n",max_colscore); fprintf(stdout,"GOP %.3f\n",gap_openscore); fprintf(stdout,"GEP %.3f\n",gap_extscore); fprintf(stdout,"LQR %.3f\n",q1); } for(i=0;i<nseqs;i++) ckfree(gop[i]); ckfree(gop); for(i=0;i<nseqs;i++) ckfree(gep[i]); ckfree(gep); for(i=0;i<nseqs;i++) ckfree(egp[i]); ckfree(egp); for(i=0;i<nseqs;i++) ckfree(seqweight[i]); ckfree(seqweight); ckfree(qpw); ckfree(use_seq); ckfree(fragment); ckfree(useqlen_array); /* write out the sequences */ strcpy(opt.alnout_opt->relacs_outname,outfile); if(!open_alignment_output(outfile,opt.alnout_opt)) exit(1); create_alignment_output(mult_aln,*opt.alnout_opt); return 0; }