sint palign1(void) /* a profile alignment */ { sint i,j,temp; sint entries; sint *aligned, *group; float dscore; lint score; info("Start of Initial Alignment"); /* calculate sequence weights according to branch lengths of the tree - weights in global variable seq_weight normalised to sum to INT_SCALE_FACTOR */ temp = INT_SCALE_FACTOR/nseqs; for (i=0;i<nseqs;i++) seq_weight[i] = temp; distance_tree = FALSE; /* do the initial alignment......... */ group = (sint *)ckalloc( (nseqs+1) * sizeof (sint)); for(i=1; i<=profile1_nseqs; ++i) group[i] = 1; for(i=profile1_nseqs+1; i<=nseqs; ++i) group[i] = 2; entries = nseqs; aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) ); for (i=1;i<=nseqs;i++) aligned[i] = 1; score = prfalign(group, aligned); info("Sequences:%d Score:%d",(pint)entries,(pint)score); group=ckfree((void *)group); aligned=ckfree((void *)aligned); for (i=1;i<=nseqs;i++) { for (j=i+1;j<=nseqs;j++) { dscore = countid(i,j); tmat[i][j] = ((double)100.0 - (double)dscore)/(double)100.0; tmat[j][i] = tmat[i][j]; } } return(nseqs); }
int main(int argc,char **argv) { sint i,j,k,n,s; sint status; sint result_type; char c; char infile[FILENAMELEN+1]; char treefile[FILENAMELEN+1]; float meanid; FILE *tree; sint maxres,*gapptr=NULL; IN_TREEPTR itree; double dscore; double meanscore; double **tmat; sint window; sint block_cutoff; OPT opt; if(argc!=2) { fprintf(stdout,"Usage: %s input_aln\n",argv[0]); exit(1); } strcpy(infile,argv[1]); init_options(&opt); /* read in the sequences */ seq_input(infile,opt.explicit_type,FALSE,&mult_aln); if(mult_aln.nseqs<=0) { error("No sequences in %s\n",infile); exit(1); } window=8; /* count pairwise residue percent identities */ tmat = (double **) ckalloc( (mult_aln.nseqs+1) * sizeof (double *) ); for(i=0;i<mult_aln.nseqs;i++) tmat[i] = (double *)ckalloc( (mult_aln.nseqs+1) * sizeof (double) ); meanscore=0; for (i=0,n=0;i<mult_aln.nseqs;i++) { for (j=i+1;j<mult_aln.nseqs;j++) { dscore = countid(mult_aln.seqs[i],mult_aln.seqs[j]); tmat[j][i] = tmat[i][j] = (100.0 - dscore)/100.0; n++; meanscore+=dscore; } } meanscore/=(float)n; /*if(mult_aln.nseqs<100) block_cutoff=8; else if(mult_aln.nseqs<250) block_cutoff=6; else*/ block_cutoff=5; if(meanscore>50) block_cutoff=50; /* make a tree from the percent identities (used for sequence weighting) */ strcpy(treefile,infile); strcat(treefile,".ph"); if((tree = open_explicit_file(treefile))==NULL) exit(1); guide_tree(tree,mult_aln.seqs,mult_aln.nseqs, tmat, QUICKNJ); itree=(IN_TREEPTR)ckalloc(sizeof(IN_TREE)); status = read_tree(treefile, mult_aln.seqs, 0, mult_aln.nseqs,itree); for(i=0;i<mult_aln.nseqs;i++) ckfree(tmat[i]); ckfree(tmat); if (status < 0) exit(1); seq_weight = calc_seq_weights(0,mult_aln.nseqs,itree,FALSE); free_tree(itree); remove(treefile); /* find the start and end positions of each sequence */ is = (sint *)ckalloc((mult_aln.nseqs+1) * sizeof(sint)); ie = (sint *)ckalloc((mult_aln.nseqs+1) * sizeof(sint)); for(s=0;s<mult_aln.nseqs;s++) { is[s]=0; ie[s] = mult_aln.seqs[s].len; for (i=0; i<mult_aln.seqs[s].len; i++) { c = mult_aln.seqs[s].data[i]; if (!isalpha(c)) is[s]++; else break; } for (i=mult_aln.seqs[s].len-1; i>=0; i--) { c = mult_aln.seqs[s].data[i]; if (!isalpha(c)) ie[s]--; else break; } } matrix.format=0; maxres = get_cl_matrix(FALSE, gon250mt, gapptr, TRUE, 100, &matrix); all_blocks(infile,window,block_cutoff); }