Ejemplo n.º 1
0
FILE *  open_output_file(char *in_name, char *prompt, char *file_extension, char *out_name)
{
	char temp[FILENAMELEN+1];
	char path[FILENAMELEN+1];
	char local_prompt[MAXLINE];
	FILE * file_handle;

/* if the output filename is already specified, just open the file and return */
	if (out_name[0]!=EOS) {
		file_handle = open_explicit_file(out_name);
		return file_handle;
	}
}
Ejemplo n.º 2
0
int main(int argc,char **argv)
{
	sint i,j,k,n,s;
	sint status;
	sint result_type;
	char c;
	char infile[FILENAMELEN+1];
	char treefile[FILENAMELEN+1];
	float meanid;
	FILE *tree;
	sint maxres,*gapptr=NULL;
	IN_TREEPTR itree;
	double dscore;
	double meanscore;
	double **tmat;
	sint window;
	sint block_cutoff;
	OPT opt;

	if(argc!=2) {
		fprintf(stdout,"Usage: %s input_aln\n",argv[0]);
		exit(1);
	}

	strcpy(infile,argv[1]);

        init_options(&opt);

/* read in the sequences */
	seq_input(infile,opt.explicit_type,FALSE,&mult_aln);
	if(mult_aln.nseqs<=0) {
		error("No sequences in %s\n",infile);
		exit(1);
	}


	window=8;

/* count pairwise residue percent identities */
        tmat = (double **) ckalloc( (mult_aln.nseqs+1) * sizeof (double *) );
        for(i=0;i<mult_aln.nseqs;i++)
                tmat[i] = (double *)ckalloc( (mult_aln.nseqs+1) * sizeof (double) );

	meanscore=0;
        for (i=0,n=0;i<mult_aln.nseqs;i++) {
                for (j=i+1;j<mult_aln.nseqs;j++) {
                        dscore = countid(mult_aln.seqs[i],mult_aln.seqs[j]);
                        tmat[j][i] = tmat[i][j] = (100.0 - dscore)/100.0;
			n++;
			meanscore+=dscore;
                }
        }
	meanscore/=(float)n;

	/*if(mult_aln.nseqs<100) block_cutoff=8;
	else if(mult_aln.nseqs<250) block_cutoff=6;
	else*/ block_cutoff=5;

	if(meanscore>50) block_cutoff=50;

/* make a tree from the percent identities (used for sequence weighting) */
	strcpy(treefile,infile);
	strcat(treefile,".ph");
        if((tree = open_explicit_file(treefile))==NULL) exit(1);

        guide_tree(tree,mult_aln.seqs,mult_aln.nseqs, tmat, QUICKNJ);
        itree=(IN_TREEPTR)ckalloc(sizeof(IN_TREE));

        status = read_tree(treefile, mult_aln.seqs, 0, mult_aln.nseqs,itree);
        for(i=0;i<mult_aln.nseqs;i++)
                ckfree(tmat[i]);
        ckfree(tmat);

        if (status < 0) exit(1);


        seq_weight = calc_seq_weights(0,mult_aln.nseqs,itree,FALSE);
        free_tree(itree);
        remove(treefile);


/* find the start and end positions of each sequence */

	is = (sint *)ckalloc((mult_aln.nseqs+1) * sizeof(sint));
	ie = (sint *)ckalloc((mult_aln.nseqs+1) * sizeof(sint));
	for(s=0;s<mult_aln.nseqs;s++) {
		is[s]=0;
                ie[s] = mult_aln.seqs[s].len;
                for (i=0; i<mult_aln.seqs[s].len; i++) {
                        c = mult_aln.seqs[s].data[i];
                        if (!isalpha(c))
                                is[s]++;
                        else
                                break;
                }
                for (i=mult_aln.seqs[s].len-1; i>=0; i--) {
                        c = mult_aln.seqs[s].data[i];
                        if (!isalpha(c))
                                ie[s]--;
                        else
                                break;
                }
	}

	matrix.format=0;
	maxres = get_cl_matrix(FALSE, gon250mt, gapptr, TRUE, 100, &matrix);

	all_blocks(infile,window,block_cutoff);
}
Ejemplo n.º 3
0
void create_parameter_output(OPT opt,ALN mult_aln)
{
	char parname[FILENAMELEN+1], temp[FILENAMELEN+1];
	char path[FILENAMELEN+1];
	FILE *parout;
	Boolean usemenu;

        get_path(mult_aln.filename,path);
        strcpy(parname,path);
        strcat(parname,"par");

	usemenu=get_usemenu();
	if(usemenu) {
        	fprintf(stdout,"\nEnter a name for the parameter output file [%s]: ",
                                           parname);
               	gets(temp);
               	if(*temp != EOS)
                       	strcpy(parname,temp);
       	}

/* create a file with execute permissions first */
	remove(parname);
/*
	fd = creat(parname, 0777);
	close(fd);
*/

        if((parout = open_explicit_file(parname))==NULL) return;

        fprintf(parout,"clustalw \\\n");
	if ((mult_aln.nseqs>0) && (mult_aln.prf1.nseqs<=0)) fprintf(parout,"-infile=%s \\\n",mult_aln.filename);
	if (mult_aln.prf1.nseqs>0) fprintf(parout,"-profile1=%s\\\n",mult_aln.prf1.filename);
	if (mult_aln.prf2.nseqs>0) fprintf(parout,"-profile2=%s\\\n",mult_aln.prf2.filename);
	if (mult_aln.dnaflag == TRUE) fprintf(parout,"-type=dna \\\n");
	else                 fprintf(parout,"-type=protein \\\n");

	if (opt.pw_opt->quick_pairalign) {
		fprintf(parout,"-quicktree \\\n");
		if (!mult_aln.dnaflag) {
			fprintf(parout,"-ktuple=%d \\\n",(pint)opt.quickpw_opt->dna_ktup);
     			fprintf(parout,"-window=%d \\\n",(pint)opt.quickpw_opt->dna_window);
     			fprintf(parout,"-pairgap=%d \\\n",(pint)opt.quickpw_opt->dna_wind_gap);
     			fprintf(parout,"-topdiags=%d \\\n",(pint)opt.quickpw_opt->dna_signif);    
		}
		else {
			fprintf(parout,"-ktuple=%d \\\n",(pint)opt.quickpw_opt->prot_ktup);
     			fprintf(parout,"-window=%d \\\n",(pint)opt.quickpw_opt->prot_window);
     			fprintf(parout,"-pairgap=%d \\\n",(pint)opt.quickpw_opt->prot_wind_gap);
     			fprintf(parout,"-topdiags=%d \\\n",(pint)opt.quickpw_opt->prot_signif);    
		}
     		if (opt.quickpw_opt->percent) fprintf(parout,"/score=percent \\\n");      
     		else         fprintf(parout,"-score=absolute \\\n");      
	}
	else {
		if (!mult_aln.dnaflag) {
			fprintf(parout,"-pwmatrix=%s \\\n",opt.pw_opt->mtrxname);
			fprintf(parout,"-pwgapopen=%.2f \\\n",opt.pw_opt->prot_go_penalty);
			fprintf(parout,"-pwgapext=%.2f \\\n",opt.pw_opt->prot_ge_penalty);
		}
		else {
			fprintf(parout,"-pwgapopen=%.2f \\\n",opt.pw_opt->dna_go_penalty);
			fprintf(parout,"-pwgapext=%.2f \\\n",opt.pw_opt->dna_ge_penalty);
		}
	}

	if (!mult_aln.dnaflag) {
		fprintf(parout,"-matrix=%s \\\n",opt.mult_opt->mtrxname);
		fprintf(parout,"-gapopen=%.2f \\\n",opt.mult_opt->prot_gap_open);
		fprintf(parout,"-gapext=%.2f \\\n",opt.mult_opt->prot_gap_extend);
	}
	else {
		fprintf(parout,"-gapopen=%.2f \\\n",opt.mult_opt->dna_gap_open);
		fprintf(parout,"-gapext=%.2f \\\n",opt.mult_opt->dna_gap_extend);
	}

	fprintf(parout,"-maxdiv=%d \\\n",(pint)opt.mult_opt->divergence_cutoff);
	if (!opt.mult_opt->gap_opt->use_endgaps) fprintf(parout,"-endgaps \\\n");    

	if (!mult_aln.dnaflag) {
     		if (opt.mult_opt->neg_matrix) fprintf(parout,"-negative \\\n");   
     		if (opt.mult_opt->gap_opt->no_pref_penalties) fprintf(parout,"-nopgap \\\n");     
     		if (opt.mult_opt->gap_opt->no_hyd_penalties) fprintf(parout,"-nohgap \\\n");     
     		if (opt.mult_opt->gap_opt->no_var_penalties) fprintf(parout,"/novgap \\\n");     
    		fprintf(parout,"-hgapresidues=%s \\\n",opt.mult_opt->gap_opt->hyd_residues);
     		fprintf(parout,"-gapdist=%d \\\n",(pint)opt.mult_opt->gap_opt->gap_dist);     
	}
	else {
		fprintf(parout,"-transweight=%.2f \\\n",opt.mult_opt->transition_weight);
	}

     	if (opt.alnout_opt->output_gcg) fprintf(parout,"-output=gcg \\\n");
     	else if (opt.alnout_opt->output_gde) fprintf(parout,"-output=gde \\\n");
     	else if (opt.alnout_opt->output_nbrf) fprintf(parout,"-output=pir \\\n");
     	else if (opt.alnout_opt->output_phylip) fprintf(parout,"-output=phylip \\\n");
     	if (opt.alnout_opt->output_order==ALIGNED) fprintf(parout,"-outorder=aligned \\\n");  
     	else                      fprintf(parout,"-outorder=input \\\n");  
     	if (opt.alnout_opt->output_gde)
		if (opt.alnout_opt->lowercase) fprintf(parout,"-case=lower \\\n");
		else           fprintf(parout,"-case=upper \\\n");


        fprintf(parout,"-interactive\n");

	fclose(parout);

}