Beispiel #1
0
int main(int argc, char **argv)
{
    /* ACD data item variables */
    AjPFile oldhmmfile = NULL;
    AjPStr      format = NULL;
    AjPFile newhmmfile = NULL;

    /* Housekeeping variables */
    AjPStr        cmd = NULL;
    char       option;
    




    /* ACD file processing */
    embInitPV("ehmmconvert",argc,argv,"HMMERNEW",VERSION);

    oldhmmfile = ajAcdGetInfile("oldhmmfile");
    format     = ajAcdGetListSingle("format");
    newhmmfile = ajAcdGetOutfile("newhmmfile");





    /* MAIN APPLICATION CODE */
    cmd = ajStrNew();


    /* 1. Build hmmconvert command line */
    /* Command line is built in this order: 
       i.  Application name.
       ii. HMMER 'options' (in order they appear in ACD file)
       iii.HMMER 'options' (that don't appear in ACD file)
       iv. HMMER & new parameters.
       */
    ajStrAssignS(&cmd, ajAcdGetpathC("hmmconvert"));

    /* ACD option only allows one selection */
    option = ajStrGetCharFirst(format);
    if(option == 'A')
	ajStrAppendC(&cmd, " -a ");
    else if(option == 'B')
	ajStrAppendC(&cmd, " -b ");
    else if(option == 'G')
	ajStrAppendC(&cmd, " -p ");
    else if(option == 'X')
	ajStrAppendC(&cmd, " -P ");
    /* -A (append) always set but file will be wiped by EMBOSS first unless 
       append: "Y" is set for "newhmmfile" in the ACD file. */

    /* hmmer cannot append to empty file, so only set the -A (append) option
       if file was opened for appending to (and therefore was not wiped by
       EMBOSS) and is not zero size. */
    if(ajFileIsAppend(newhmmfile) && (ajFilenameGetSize(ajFileGetNameS(newhmmfile))!=-1))
	ajStrAppendC(&cmd, " -A ");
    ajStrAppendC(&cmd, " -F ");
    ajFmtPrintAppS(&cmd, " %s %s", ajFileGetNameC(oldhmmfile), ajFileGetNameC(newhmmfile));


    /* 2. Close ACD files. */
    ajFileClose(&oldhmmfile);
    ajFileClose(&newhmmfile);


    /* 3. Call hmmconvert */
    ajFmtPrint("\n%S\n\n", cmd);
    ajSysExecS(cmd);


    /* 4. Exit cleanly */
    ajStrDel(&cmd);
    ajStrDel(&format);

    embExit();

    return 0;
}
Beispiel #2
0
int main(int argc, char **argv)
{

    AjPSeqall seqall = NULL;
    AjPFile dend_outfile = NULL;
    AjPStr tmp_dendfilename = NULL;
    AjPFile tmp_dendfile = NULL;

    AjPStr tmp_aln_outfile = NULL;
    AjPSeqset seqset = NULL;
    AjPSeqout seqout = NULL;
    AjPSeqin  seqin  = NULL;

    AjBool only_dend;
    AjBool are_prot = ajFalse;
    AjBool do_slow;
    AjBool use_dend;
    AjPFile dend_file = NULL;
    AjPStr dend_filename = NULL;

    ajint ktup;
    ajint gapw;
    ajint topdiags;
    ajint window;
    AjBool nopercent;

    AjPStr pw_matrix = NULL;
    AjPStr pw_dna_matrix  = NULL;
    AjPFile pairwise_matrix = NULL;
    float pw_gapc;
    float pw_gapv;

    AjPStr pwmstr = NULL;
    char   pwmc   = '\0';
    AjPStr pwdstr = NULL;
    char   pwdc   = '\0';

    AjPStr m1str = NULL;
    AjPStr m2str = NULL;
    char   m1c   = '\0';
    char   m2c   = '\0';

    AjPStr matrix = NULL;
    AjPStr dna_matrix = NULL;
    AjPFile ma_matrix = NULL;
    float gapc;
    float gapv;
    AjBool endgaps;
    AjBool norgap;
    AjBool nohgap;
    ajint gap_dist;
    ajint maxdiv;
    AjPStr hgapres = NULL;


    AjPSeqout fil_file = NULL;
    AjPSeq seq = NULL;

    AjPStr cmd = NULL;
    AjPStr tmp = NULL;
    AjPStr tmpFilename;
    AjPStr line = NULL;
    ajint nb = 0;


    /* get all the parameters */

    embInit("emma", argc, argv);

    pwmstr = ajStrNew();
    pwdstr = ajStrNew();
    m1str  = ajStrNew();
    m2str  = ajStrNew();


    seqall = ajAcdGetSeqall("sequence");
    seqout = ajAcdGetSeqoutset("outseq");

    dend_outfile = ajAcdGetOutfile("dendoutfile");

    only_dend = ajAcdGetToggle("onlydend");
    use_dend  = ajAcdGetToggle("dendreuse");
    dend_file = ajAcdGetInfile("dendfile");
    if (dend_file)
	ajStrAssignS(&dend_filename, ajFileGetPrintnameS(dend_file));
    ajFileClose(&dend_file);

    do_slow = ajAcdGetToggle("slowalign");

    ktup      = ajAcdGetInt("ktup");
    gapw      = ajAcdGetInt("gapw");
    topdiags  = ajAcdGetInt("topdiags");
    window    = ajAcdGetInt("window");
    nopercent = ajAcdGetBoolean("nopercent");

    pw_matrix = ajAcdGetListSingle("pwmatrix");
    pwmc = ajStrGetCharFirst(pw_matrix);

    if(pwmc=='b')
	ajStrAssignC(&pwmstr,"blosum");
    else if(pwmc=='p')
	ajStrAssignC(&pwmstr,"pam");
    else if(pwmc=='g')
	ajStrAssignC(&pwmstr,"gonnet");
    else if(pwmc=='i')
	ajStrAssignC(&pwmstr,"id");
    else if(pwmc=='o')
	ajStrAssignC(&pwmstr,"own");


    pw_dna_matrix = ajAcdGetListSingle("pwdnamatrix");
    pwdc = ajStrGetCharFirst(pw_dna_matrix);

    if(pwdc=='i')
	ajStrAssignC(&pwdstr,"iub");
    else if(pwdc=='c')
	ajStrAssignC(&pwdstr,"clustalw");
    else if(pwdc=='o')
	ajStrAssignC(&pwdstr,"own");

    pairwise_matrix = ajAcdGetInfile("pairwisedatafile");

    pw_gapc = ajAcdGetFloat( "pwgapopen");
    pw_gapv = ajAcdGetFloat( "pwgapextend");

    matrix = ajAcdGetListSingle( "matrix");
    m1c = ajStrGetCharFirst(matrix);

    if(m1c=='b')
	ajStrAssignC(&m1str,"blosum");
    else if(m1c=='p')
	ajStrAssignC(&m1str,"pam");
    else if(m1c=='g')
	ajStrAssignC(&m1str,"gonnet");
    else if(m1c=='i')
	ajStrAssignC(&m1str,"id");
    else if(m1c=='o')
	ajStrAssignC(&m1str,"own");


    dna_matrix = ajAcdGetListSingle( "dnamatrix");
    m2c = ajStrGetCharFirst(dna_matrix);

    if(m2c=='i')
	ajStrAssignC(&m2str,"iub");
    else if(m2c=='c')
	ajStrAssignC(&m2str,"clustalw");
    else if(m2c=='o')
	ajStrAssignC(&m2str,"own");


    ma_matrix = ajAcdGetInfile("mamatrixfile");
    gapc      = ajAcdGetFloat("gapopen");
    gapv      = ajAcdGetFloat("gapextend");
    endgaps   = ajAcdGetBoolean("endgaps");
    norgap    = ajAcdGetBoolean("norgap");
    nohgap    = ajAcdGetBoolean("nohgap");
    gap_dist  = ajAcdGetInt("gapdist");
    hgapres   = ajAcdGetString("hgapres");
    maxdiv    = ajAcdGetInt("maxdiv");

    tmp = ajStrNewC("fasta");

    /*
    ** Start by writing sequences into a unique temporary file
    ** get file pointer to unique file
    */


    fil_file = ajSeqoutNew();
    tmpFilename = emma_getUniqueFileName();
    if(!ajSeqoutOpenFilename( fil_file, tmpFilename))
	embExitBad();

    /* Set output format to fasta */
    ajSeqoutSetFormatS( fil_file, tmp);

    while(ajSeqallNext(seqall, &seq))
    {
        /*
        **  Check sequences are all of the same type
        **  Still to be done
        **  Write out sequences
        */
	if (!nb)
	    are_prot  = ajSeqIsProt(seq);
        ajSeqoutWriteSeq(fil_file, seq);
	++nb;
    }
    ajSeqoutClose(fil_file);

    if(nb < 2)
	ajFatal("Multiple alignments need at least two sequences");

    /* Generate clustalw command line */
    cmd = ajStrNewS(ajAcdGetpathC("clustalw"));

    /* add tmp file containing sequences */
    ajStrAppendC(&cmd, " -infile=");
    ajStrAppendS(&cmd, tmpFilename);

    /* add out file name */
    tmp_aln_outfile = emma_getUniqueFileName();
    ajStrAppendC(&cmd, " -outfile=");
    ajStrAppendS(&cmd, tmp_aln_outfile);


    /* calculating just the nj tree or doing full alignment */
    if(only_dend)
        ajStrAppendC(&cmd, " -tree");
    else
        if(!use_dend)
	    ajStrAppendC(&cmd, " -align");

    /* Set sequence type from information from acd file */
    if(are_prot)
        ajStrAppendC(&cmd, " -type=protein");
    else
        ajStrAppendC(&cmd, " -type=dna");


    /*
    **  set output to MSF format - will read in this file later and output
    **  user requested format
    */
    ajStrAppendC(&cmd, " -output=");
    ajStrAppendC(&cmd, "gcg");

    /* If going to do pairwise alignment */
    if(!use_dend)
    {
        /* add fast pairwise alignments*/
        if(!do_slow)
        {
            ajStrAppendC(&cmd, " -quicktree");
            ajStrAppendC(&cmd, " -ktuple=");
            ajStrFromInt(&tmp, ktup);
            ajStrAppendS(&cmd, tmp);
            ajStrAppendC(&cmd, " -window=");
            ajStrFromInt(&tmp, window);
            ajStrAppendS(&cmd, tmp);
            if(nopercent)
                ajStrAppendC(&cmd, " -score=percent");
            else
                ajStrAppendC(&cmd, " -score=absolute");
            ajStrAppendC(&cmd, " -topdiags=");
            ajStrFromInt(&tmp, topdiags);
            ajStrAppendS(&cmd, tmp);
            ajStrAppendC(&cmd, " -pairgap=");
            ajStrFromInt(&tmp, gapw);
            ajStrAppendS(&cmd, tmp);
        }
        else
        {
            if(pairwise_matrix)
            {
		if(are_prot)
		    ajStrAppendC(&cmd, " -pwmatrix=");
		else
		    ajStrAppendC(&cmd, " -pwdnamatrix=");
		ajStrAppendS(&cmd, ajFileGetPrintnameS(pairwise_matrix));
            }
            else
            {
		if(are_prot)
		{
		    ajStrAppendC(&cmd, " -pwmatrix=");
		    ajStrAppendS(&cmd, pwmstr);
		}
		else
		{
		    ajStrAppendC(&cmd, " -pwdnamatrix=");
		    ajStrAppendS(&cmd, pwdstr);
		}
            }
            ajStrAppendC(&cmd, " -pwgapopen=");
            ajStrFromFloat(&tmp, pw_gapc, 3);
            ajStrAppendS(&cmd, tmp);
            ajStrAppendC(&cmd, " -pwgapext=");
            ajStrFromFloat(&tmp, pw_gapv, 3);
            ajStrAppendS(&cmd, tmp);
        }
    }

    /* Multiple alignments */

    /* using existing tree or generating new tree? */
    if(use_dend)
    {
        ajStrAppendC(&cmd, " -usetree=");
        ajStrAppendS(&cmd, dend_filename);
    }
    else
    {
	/* use tmp file to hold dend file, will read back in later */
	tmp_dendfilename = emma_getUniqueFileName();
        ajStrAppendC(&cmd, " -newtree=");
        ajStrAppendS(&cmd, tmp_dendfilename);
    }

    if(ma_matrix)
    {
	if(are_prot)
	    ajStrAppendC(&cmd, " -matrix=");
	else
	    ajStrAppendC(&cmd, " -pwmatrix=");
	ajStrAppendS(&cmd, ajFileGetPrintnameS(ma_matrix));
    }
    else
    {
	if(are_prot)
	{
	    ajStrAppendC(&cmd, " -matrix=");
	    ajStrAppendS(&cmd, m1str);
	}
	else
	{
	    ajStrAppendC(&cmd, " -dnamatrix=");
	    ajStrAppendS(&cmd, m2str);
	}
    }

    ajStrAppendC(&cmd, " -gapopen=");
    ajStrFromFloat(&tmp, gapc, 3);
    ajStrAppendS(&cmd, tmp);
    ajStrAppendC(&cmd, " -gapext=");
    ajStrFromFloat(&tmp, gapv, 3);
    ajStrAppendS(&cmd, tmp);
    ajStrAppendC(&cmd, " -gapdist=");
    ajStrFromInt(&tmp, gap_dist);
    ajStrAppendS(&cmd, tmp);
    ajStrAppendC(&cmd, " -hgapresidues=");
    ajStrAppendS(&cmd, hgapres);

    if(!endgaps)
	ajStrAppendC(&cmd, " -endgaps");

    if(norgap)
	ajStrAppendC(&cmd, " -nopgap");

    if(nohgap)
	ajStrAppendC(&cmd, " -nohgap");

    ajStrAppendC(&cmd, " -maxdiv=");
    ajStrFromInt(&tmp, maxdiv);
    ajStrAppendS(&cmd, tmp);


    /*  run clustalw */

/*    ajFmtError("..%s..\n\n", ajStrGetPtr( cmd)); */
    ajDebug("Executing '%S'\n", cmd);

    ajSysExecS(cmd);

    /* produce alignment file only if one was produced */
    if(!only_dend)
    {
	/* read in tmp alignment output file to output through EMBOSS output */

	seqin = ajSeqinNew();
	/*
	**  add the Usa format to the start of the filename to tell EMBOSS
	**  format of file
	*/
	ajStrInsertC(&tmp_aln_outfile, 0, "msf::");
	ajSeqinUsa(&seqin, tmp_aln_outfile);
	seqset = ajSeqsetNew();
	if(ajSeqsetRead(seqset, seqin))
	{
	    ajSeqoutWriteSet(seqout, seqset);


	    ajSeqoutClose(seqout);
	    ajSeqinDel(&seqin);

	    /* remove the Usa from the start of the string */
	    ajStrCutStart(&tmp_aln_outfile, 5);
	}
	else
	    ajFmtError("Problem writing out EMBOSS alignment file\n");
    }


    /* read in new tmp dend file (if produced) to output through EMBOSS */
    if(tmp_dendfilename!=NULL)
    {
	tmp_dendfile = ajFileNewInNameS( tmp_dendfilename);

	if(tmp_dendfile!=NULL){
	while(ajReadlineTrim(tmp_dendfile, &line))
	    ajFmtPrintF(dend_outfile, "%s\n", ajStrGetPtr( line));

	ajFileClose(&tmp_dendfile);
	ajSysFileUnlinkS(tmp_dendfilename);
    }
    }


    ajSysFileUnlinkS(tmpFilename);

    if(!only_dend)
	ajSysFileUnlinkS(tmp_aln_outfile);

    ajStrDel(&pw_matrix);
    ajStrDel(&matrix);
    ajStrDel(&pw_dna_matrix);
    ajStrDel(&dna_matrix);
    ajStrDel(&tmp_dendfilename);
    ajStrDel(&dend_filename);
    ajStrDel(&tmp_aln_outfile);
    ajStrDel(&pwmstr);
    ajStrDel(&pwdstr);
    ajStrDel(&m1str);
    ajStrDel(&m2str);
    ajStrDel(&hgapres);
    ajStrDel(&cmd);
    ajStrDel(&tmp);
    ajStrDel(&tmpFilename);
    ajStrDel(&line);

    ajFileClose(&dend_outfile);
    ajFileClose(&tmp_dendfile);
    ajFileClose(&dend_file);
    ajFileClose(&pairwise_matrix);
    ajFileClose(&ma_matrix);

    ajSeqallDel(&seqall);
    ajSeqsetDel(&seqset);
    ajSeqDel(&seq);
    ajSeqoutDel(&seqout);
    ajSeqoutDel(&fil_file);
    ajSeqinDel(&seqin);

    embExit();

    return 0;
}
Beispiel #3
0
int main(int argc, char **argv)
{
    /* ACD data item variables */
    AjPFile    hmmfile = NULL;     
    AjPSeqset  seqfile = NULL;     
    AjPFile     mapali = NULL;     
    AjPFile    withali = NULL;     
    AjPAlign         o = NULL;     
    AjBool           m = ajFalse;
    AjBool           q = ajFalse;

    /* Housekeeping variables */
    AjPStr        cmd = NULL;
    AjPStr        fmt = NULL;
    AjBool      fmtok = ajFalse;
    AjPStr        rnd = NULL;      
    AjPSeqout    rndo = NULL;      
    


    /* ACD file processing */
    embInitPV("ehmmalign",argc,argv,"HMMERNEW",VERSION);

    hmmfile = ajAcdGetInfile("hmmfile");
    seqfile = ajAcdGetSeqset("seqfile");
    mapali  = ajAcdGetInfile("mapali");
    withali = ajAcdGetInfile("withali");
    o       = ajAcdGetAlign("o");
    m       = ajAcdGetBoolean("m");
    q       = ajAcdGetBoolean("q");





    /* MAIN APPLICATION CODE */
    /* 1. Housekeeping */
    cmd  = ajStrNew();
    fmt  = ajStrNew();
    rnd  = ajStrNew();



    
    /* 2. Re-write seqfile to a temporary file in a format (fasta) HMMER can understand.
       We cannot just pass the name of seqfile to HMMER as the name provided might be a 
       USA which HMMER would not understand. */
    ajFilenameSetTempname(&rnd);
    rndo = ajSeqoutNew();
    if(!ajSeqoutOpenFilename(rndo, rnd))
	ajFatal("Terminal ajSeqFileNewOut failure. Email EMBOSS helpdesk!\n");
    ajSeqoutSetFormatC(rndo, "fasta");
    ajSeqoutWriteSet(rndo, seqfile);
    ajSeqoutClose(rndo);
    ajSeqoutDel(&rndo);


    /* 3. Build hmmalign command line */
    /* Command line is built in this order: 
       i.  Application name.
       ii. HMMER 'options' (in order they appear in ACD file)
       iii.HMMER 'options' (that don't appear in ACD file)
       iv. HMMER & new parameters.
       */
    ajFmtPrintS(&cmd, "%S ", ajAcdGetpathC("hmmalign"));
    if(mapali)
	ajFmtPrintAppS(&cmd, " --mapali %s ", ajFileGetNameC(mapali));
    if(withali)
	ajFmtPrintAppS(&cmd, " --withali %s ", ajFileGetNameC(withali));
    if(m)
	ajStrAppendC(&cmd, " -m ");
    if(q)
	ajStrAppendC(&cmd, " -q ");


    /* Ensure output alignment is in user-specified format. */
    fmtok=ajTrue;
    ajStrAssignS(&fmt, ajAlignGetFormat(o));
    /* fasta and a2m are identical formats. */
    if(ajStrMatchC(fmt, "fasta"))
	ajStrAssignC(&fmt, "A2M");
    else if(ajStrMatchC(fmt, "a2m"))
	ajStrAssignC(&fmt, "A2M");
    else if(ajStrMatchC(fmt, "msf"))
	ajStrAssignC(&fmt, "MSF");
    else if(ajStrMatchC(fmt, "phylip"))
	ajStrAssignC(&fmt, "PHYLIP");
    /* hmmer also supports stockholm, SELEX & Clustal output, EMBOSS does not.
       EMBOSS supports unknown/multiple/simple and srs output, hmmer does not. */ 
    else
	fmtok = ajFalse;

    if(!fmtok)
    {
	/* This could be replaced with code to reformat the file. */
	ajWarn("Specified output alignment format ('o' ACD option) is "
	       "not understood by HMMER.  Using stockholm format instead.");
	ajStrAssignC(&fmt, "Stockholm");
    }
       

    /* rnd is the name of the rewritten seqfile.  MUST specify FASTA format explicitly. */
    ajFmtPrintAppS(&cmd, " --informat FASTA --outformat %S  -o %s %s %S", 
		   fmt,
		   ajAlignGetFilename(o),
		   ajFileGetNameC(hmmfile),
		   rnd);
            
    /* 4. Close ACD files */
    ajFileClose(&hmmfile);    
    ajSeqsetDel(&seqfile);
    ajFileClose(&mapali);
    ajFileClose(&withali);
    ajAlignClose(o);
    ajAlignDel(&o);

    
    /* 5. Call hmmalign */
    ajFmtPrint("\n%S\n\n", cmd);
    ajSysExecS(cmd);


    /* 6. Exit cleanly */
    ajSysFileUnlinkS(rnd); 
    
    ajStrDel(&cmd);
    ajStrDel(&fmt);
    ajStrDel(&rnd);
    embExit();

    return 0;
}
Beispiel #4
0
/* @funcstatic domainalign_stamp **********************************************
**
** Call STAMP and process files.
**
** @param [r] prevdomain [AjPDomain] Previous domain.
** @param [r] domain [AjPDomain] This domain.
** @param [r] daf [AjPDirout] Domain alignment files.
** @param [r] super [AjPDirout] Superimposition files.
** @param [r] singlets [AjPDirout]  Singlet files.
** @param [r] align [AjPStr]   Align.
** @param [r] alignc [AjPStr] Alignc.
** @param [r] dom [AjPStr]   Dom.
** @param [r] name [AjPStr] Name.
** @param [r] set [AjPStr] Name of set file.
** @param [r] scan [AjPStr] Name of scan file.
** @param [r] sort [AjPStr] Name of sort file.
** @param [r] log [AjPStr] Log file name.
** @param [r] out [AjPStr] Out file name.
** @param [r] keepsinglets [AjBool] Keep singlet sequences or not.
** @param [r] moden [ajint] Mode number.
** @param [r] noden [ajint] Node number.
** @param [r] nset [ajint] Number in set.
** @param [r] logf [AjPFile] Lof file.
** 
**
** @return [void] True on success
** @@
****************************************************************************/
static void domainalign_stamp(AjPDomain prevdomain,
			      AjPDomain domain, 
			      AjPDirout daf, 
			      AjPDirout super,
			      AjPDirout singlets, 
			      AjPStr    align, 
			      AjPStr    alignc, 
			      AjPStr    dom, 
			      AjPStr    name, 
			      AjPStr    set, 
			      AjPStr    scan, 
			      AjPStr    sort, 
			      AjPStr    log, 
			      AjPStr    out, 
			      AjBool    keepsinglets, 
			      ajint     moden, 
			      ajint     noden,
			      ajint     nset, 
			      AjPFile   logf)
{
    AjPStr    exec      = NULL;	/* The UNIX command line to be executed.   */
    AjPFile   clusterf  = NULL;	/* File pointer for log file.              */
    ajint     ncluster  = 0;	/* Counter for the number of clusters.     */
    AjPStr    line      = NULL;	/* Holds a line from the log file.         */
    AjPRegexp rexp      = NULL;	/* For parsing no. of clusters in log file */
    AjPStr    temp      = NULL;	/* A temporary string.                     */
    ajint     x         = 0;    /* Loop counter.                           */
    

    exec     = ajStrNew();
    line     = ajStrNew();
    temp     = ajStrNew();



    rexp     = ajRegCompC("^(Cluster:)");

    ajDebug("domainalign_stamp name: '%S'\n", name);
    
    /* Call STAMP. */
    ajFmtPrintS(&exec,	"%S -l %S -s -n 2 -slide 5 -prefix %S -d %S",
		ajAcdGetpathC("stamp"), dom, name, set);
    ajFmtPrint("\n%S\n\n", exec);
    ajSysExecS(exec);  

    ajFmtPrintS(&exec, "%S -f %S -s Sc 2.5",
		ajAcdGetpathC("sorttrans"), scan);
    ajFmtPrint("\n%S > %S\n\n", exec, sort);

    ajSysExecOutnameS(exec, sort);

    ajFmtPrintS(&exec, "%S -l %S -prefix %S",
		ajAcdGetpathC("stamp"), sort, name);
    ajFmtPrint("\n%S > %S\n\n", exec, log);
    ajSysExecOutnameS(exec, log);
	
    ajFmtPrintS(&exec, "%S -f %S -g  -o %S",
		ajAcdGetpathC("transform"), sort, alignc);
    ajFmtPrint("\n%S\n\n", exec);
    ajSysExecS(exec);
    
    
    /* Count the number of clusters in the log file. */
    if(!(clusterf=ajFileNewInNameS(log)))
	ajFatal("Could not open log file '%S'\n", log);
    ncluster=0;
    while(ajReadlineTrim(clusterf,&line))
	if(ajRegExec(rexp,line))
	    ncluster++;
    ajFileClose(&clusterf);	

    ajDebug("ncluster: %d\n", ncluster);
    
    /* Call STAMP ... calculate two fields for structural equivalence using 
       threshold Pij value of 0.5, see stamp manual v4.1 pg 27. */
    ajFmtPrintS(&exec,"%S -f %S.%d -min 0.5",
		ajAcdGetpathC("poststamp"), name, ncluster);
    ajFmtPrint("%S\n\n", exec);
    ajSysExecS(exec);
    
    
    /* Call STAMP ... convert block format alignment into clustal format. */
    ajFmtPrintS(&exec,"%S -f %S.%d.post",
		ajAcdGetpathC("ver2hor"), name, ncluster); 
    ajFmtPrint("%S > %S\n\n", exec, out);
    ajSysExecOutnameS(exec, out);
    
    
    /* Process STAMP alignment file and generate alignment file for output. */
    domainalign_ProcessStampFile(out, align, prevdomain, noden, logf);
    
    
    /* Remove all temporary files. */
    
    for(x=1;x<ncluster+1;x++)
    {
	ajFmtPrintS(&temp, "%S.%d", name, x);
	ajSysFileUnlinkS(temp); 
    }
    
    ajFmtPrintS(&temp, "%S.%d.post", name, ncluster);
    ajSysFileUnlinkS(temp); 

    ajStrDel(&exec);
    ajStrDel(&line);
    ajStrDel(&temp);
    ajRegFree(&rexp);

    return;
}   
Beispiel #5
0
int main(int argc, char **argv)
{
    /* ACD data item variables */
    AjPSeqset alignfile = NULL; 
    AjPFile       prior = NULL;
    AjPFile        null = NULL;
    AjPFile         pam = NULL;
    float        pamwgt = 0.0;
    AjPStr         nhmm = NULL;
    AjPStr     strategy = NULL;
    ajint      pbswitch = 0;
    float       archpri = 0.0;
    AjBool      binary  = ajFalse;
    AjBool         fast = ajFalse;
    float        gapmax = 0.0;
    AjBool         hand = ajFalse;
    float       idlevel = 0.0;
    AjBool        noeff = ajFalse;
    float       swentry = 0.0;
    float        swexit = 0.0;
    AjBool    verbosity = ajFalse;
    AjPStr    weighting = NULL;
    AjPFile     hmmfile = NULL;
    AjPFile           o = NULL;
    AjPFile       cfile = NULL;

    /* Housekeeping variables */
    AjPStr          cmd = NULL;
    AjPStr         rnd1 = NULL;
    AjPStr         rnd2 = NULL;
    AjPStr          fmt = NULL;
    char         option;
    AjBool        fmtok = ajFalse;
    AjPStr  hmmfilename = NULL;
    




    /* ACD file processing */
    embInitPV("ehmmbuild",argc,argv,"HMMERNEW",VERSION);

    alignfile = ajAcdGetSeqset("alignfile");
    prior     = ajAcdGetInfile("prior");
    null      = ajAcdGetInfile("null");
    pam       = ajAcdGetInfile("pam");
    pamwgt    = ajAcdGetFloat("pamwgt");
    nhmm      = ajAcdGetString("nhmm");
    strategy  = ajAcdGetListSingle("strategy");
    pbswitch  = ajAcdGetInt("pbswitch");
    archpri   = ajAcdGetFloat("archpri");
    binary    = ajAcdGetBoolean("binary");
    fast      = ajAcdGetBoolean("fast");
    gapmax    = ajAcdGetFloat("gapmax");
    hand      = ajAcdGetBoolean("hand");
    idlevel   = ajAcdGetFloat("sidlevel");
    noeff     = ajAcdGetBoolean("noeff");
    swentry   = ajAcdGetFloat("swentry");
    swexit    = ajAcdGetFloat("swexit");
    verbosity = ajAcdGetBoolean("verbosity");
    weighting = ajAcdGetListSingle("weighting");
    hmmfile   = ajAcdGetOutfile("hmmfile");
    o         = ajAcdGetOutfile("o");
    cfile     = ajAcdGetOutfile("cfile");





    /* MAIN APPLICATION CODE */
    /* 1. Housekeeping */
    cmd  = ajStrNew();
    rnd1 = ajStrNew();
    rnd2 = ajStrNew();
    fmt  = ajStrNew();
    hmmfilename = ajStrNew();

    ajStrAssignC(&hmmfilename, ajFileGetNameC(hmmfile));


    /* 2. Ensure alignfile is in format HMMER can understand.  These include
       FASTA, GENBANK,EMBL, GCG, PIR, STOCKHOLM, SELEX, MSF,CLUSTAL and PHYLIP.
       EMBOSS name definitions are taken from seqInFormatDef in ajseqread.c and
       seqOutFormat in ajseqwrite.c */
    fmtok=ajFalse;
    ajStrAssignS(&fmt, ajSeqsetGetFormat(alignfile));
    if(ajStrMatchC(fmt, "fasta")    ||
       ajStrMatchC(fmt, "genbank")  ||
       ajStrMatchC(fmt, "embl")     ||
       ajStrMatchC(fmt, "gcg")      ||
       ajStrMatchC(fmt, "pir")      ||
       ajStrMatchC(fmt, "stockholm")||
       ajStrMatchC(fmt, "selex")    ||
       ajStrMatchC(fmt, "msf")      ||
       ajStrMatchC(fmt, "clustal")  ||
       ajStrMatchC(fmt, "phylip"))
	fmtok = ajTrue;
    /* This could be replaced with code to reformat the file. */
    if(!fmtok)
	ajFatal("Input alignment ('alignfile' ACD option) is not in format "
		"HMMER understands. Please use a a file in FASTA, GENBANK, "
		"EMBL, GCG, PIR, STOCKHOLM, SELEX, MSF,CLUSTAL or PHYLIP format.");
    

    /* 3. Build hmmbuild command line */
    /* Command line is built in this order: 
       i.  Application name.
       ii. HMMER 'options' (in order they appear in ACD file)
       iii.HMMER 'options' (that don't appear in ACD file)
       iv. HMMER & new parameters.
       */
    ajStrAssignS(&cmd, ajAcdGetpathC("hmmbuild"));
    if(prior)
	ajFmtPrintAppS(&cmd, " --prior %s ", ajFileGetNameC(prior));
    if(null)
	ajFmtPrintS(&cmd, " --null %s ", ajFileGetNameC(null));
    if(pam)
	ajFmtPrintAppS(&cmd, " --pam %s  --pamwgt %f ", ajFileGetNameC(pam), pamwgt);
    ajFmtPrintAppS(&cmd, " -n %S ", nhmm);

    /* ACD option only allows one selection */
    option = ajStrGetCharFirst(strategy);
    if(option == 'F')
	ajStrAppendC(&cmd, " -f ");
    else if(option == 'G')
	ajStrAppendC(&cmd, " -g ");
    else if(option == 'S')
	ajStrAppendC(&cmd, " -s ");
    /* else go with default ('D' option in ACD file) */
    ajFmtPrintAppS(&cmd, " --pbswitch %d ", pbswitch);
    ajFmtPrintAppS(&cmd, " --archpri %f ", archpri);
    if(binary)
	ajStrAppendC(&cmd, " --binary ");
    if(fast)
	ajFmtPrintAppS(&cmd, " --fast --gapmax %f ", gapmax);
    if(hand)
	ajStrAppendC(&cmd, " --hand ");
    ajFmtPrintAppS(&cmd, " --idlevel %f ", idlevel);
    if(noeff)
	ajStrAppendC(&cmd, " --noeff ");
    ajFmtPrintAppS(&cmd, " --swentry %f ", swentry);
    ajFmtPrintAppS(&cmd, " --swexit %f ", swexit);
    if(verbosity)
	ajStrAppendC(&cmd, " --verbose ");

    /* ACD option only allows one selection */
    option = ajStrGetCharFirst(weighting);
    if(option == 'B')
	ajStrAppendC(&cmd, " --wblosum ");
    else if(option == 'G')
	ajStrAppendC(&cmd, " --wgsc ");
    else if(option == 'K')
	ajStrAppendC(&cmd, " --wme ");
    else if(option == 'W')
	ajStrAppendC(&cmd, " --wpb ");
    else if(option == 'V')
	ajStrAppendC(&cmd, " --wvoronoi ");
    else if(option == 'N')
	ajStrAppendC(&cmd, " --wnone ");
    if(o)
	ajFmtPrintAppS(&cmd, " -o %s ", ajFileGetNameC(o));
    if(cfile)
	ajFmtPrintAppS(&cmd, " --cfile %s ", ajFileGetNameC(cfile));
    /* -A (append) always set but file will be wiped by EMBOSS first unless 
       append: "Y" is set for "hmmfile" in the ACD file. */
    ajStrAppendC(&cmd, " -A -F ");
    ajFmtPrintAppS(&cmd, " %S %S", hmmfilename, ajSeqsetGetFilename(alignfile));


    /* 4. Close ACD files */
    ajSeqsetDel(&alignfile);
    ajFileClose(&prior);
    ajFileClose(&null);
    ajFileClose(&pam);
    ajFileClose(&hmmfile);
    ajFileClose(&o);
    ajFileClose(&cfile);


    /* 5. Call hmmbuild */
    ajFmtPrint("\n%S\n", cmd); 
    ajSysExecS(cmd);    


    /* 6. Exit cleanly */
    ajStrDel(&nhmm);
    ajStrDel(&cmd);
    ajStrDel(&rnd1);
    ajStrDel(&rnd2);
    ajStrDel(&fmt);
    ajStrDel(&hmmfilename);
    ajStrDel(&strategy);
    ajStrDel(&weighting);
    
    embExit();

    return 0;
}