Example #1
0
int main(int argc, char **argv)
{

    AjPSeqall seqall = NULL;
    AjPFile dend_outfile = NULL;
    AjPStr tmp_dendfilename = NULL;
    AjPFile tmp_dendfile = NULL;

    AjPStr tmp_aln_outfile = NULL;
    AjPSeqset seqset = NULL;
    AjPSeqout seqout = NULL;
    AjPSeqin  seqin  = NULL;

    AjBool only_dend;
    AjBool are_prot = ajFalse;
    AjBool do_slow;
    AjBool use_dend;
    AjPFile dend_file = NULL;
    AjPStr dend_filename = NULL;

    ajint ktup;
    ajint gapw;
    ajint topdiags;
    ajint window;
    AjBool nopercent;

    AjPStr pw_matrix = NULL;
    AjPStr pw_dna_matrix  = NULL;
    AjPFile pairwise_matrix = NULL;
    float pw_gapc;
    float pw_gapv;

    AjPStr pwmstr = NULL;
    char   pwmc   = '\0';
    AjPStr pwdstr = NULL;
    char   pwdc   = '\0';

    AjPStr m1str = NULL;
    AjPStr m2str = NULL;
    char   m1c   = '\0';
    char   m2c   = '\0';

    AjPStr matrix = NULL;
    AjPStr dna_matrix = NULL;
    AjPFile ma_matrix = NULL;
    float gapc;
    float gapv;
    AjBool endgaps;
    AjBool norgap;
    AjBool nohgap;
    ajint gap_dist;
    ajint maxdiv;
    AjPStr hgapres = NULL;


    AjPSeqout fil_file = NULL;
    AjPSeq seq = NULL;

    AjPStr cmd = NULL;
    AjPStr tmp = NULL;
    AjPStr tmpFilename;
    AjPStr line = NULL;
    ajint nb = 0;


    /* get all the parameters */

    embInit("emma", argc, argv);

    pwmstr = ajStrNew();
    pwdstr = ajStrNew();
    m1str  = ajStrNew();
    m2str  = ajStrNew();


    seqall = ajAcdGetSeqall("sequence");
    seqout = ajAcdGetSeqoutset("outseq");

    dend_outfile = ajAcdGetOutfile("dendoutfile");

    only_dend = ajAcdGetToggle("onlydend");
    use_dend  = ajAcdGetToggle("dendreuse");
    dend_file = ajAcdGetInfile("dendfile");
    if (dend_file)
	ajStrAssignS(&dend_filename, ajFileGetPrintnameS(dend_file));
    ajFileClose(&dend_file);

    do_slow = ajAcdGetToggle("slowalign");

    ktup      = ajAcdGetInt("ktup");
    gapw      = ajAcdGetInt("gapw");
    topdiags  = ajAcdGetInt("topdiags");
    window    = ajAcdGetInt("window");
    nopercent = ajAcdGetBoolean("nopercent");

    pw_matrix = ajAcdGetListSingle("pwmatrix");
    pwmc = ajStrGetCharFirst(pw_matrix);

    if(pwmc=='b')
	ajStrAssignC(&pwmstr,"blosum");
    else if(pwmc=='p')
	ajStrAssignC(&pwmstr,"pam");
    else if(pwmc=='g')
	ajStrAssignC(&pwmstr,"gonnet");
    else if(pwmc=='i')
	ajStrAssignC(&pwmstr,"id");
    else if(pwmc=='o')
	ajStrAssignC(&pwmstr,"own");


    pw_dna_matrix = ajAcdGetListSingle("pwdnamatrix");
    pwdc = ajStrGetCharFirst(pw_dna_matrix);

    if(pwdc=='i')
	ajStrAssignC(&pwdstr,"iub");
    else if(pwdc=='c')
	ajStrAssignC(&pwdstr,"clustalw");
    else if(pwdc=='o')
	ajStrAssignC(&pwdstr,"own");

    pairwise_matrix = ajAcdGetInfile("pairwisedatafile");

    pw_gapc = ajAcdGetFloat( "pwgapopen");
    pw_gapv = ajAcdGetFloat( "pwgapextend");

    matrix = ajAcdGetListSingle( "matrix");
    m1c = ajStrGetCharFirst(matrix);

    if(m1c=='b')
	ajStrAssignC(&m1str,"blosum");
    else if(m1c=='p')
	ajStrAssignC(&m1str,"pam");
    else if(m1c=='g')
	ajStrAssignC(&m1str,"gonnet");
    else if(m1c=='i')
	ajStrAssignC(&m1str,"id");
    else if(m1c=='o')
	ajStrAssignC(&m1str,"own");


    dna_matrix = ajAcdGetListSingle( "dnamatrix");
    m2c = ajStrGetCharFirst(dna_matrix);

    if(m2c=='i')
	ajStrAssignC(&m2str,"iub");
    else if(m2c=='c')
	ajStrAssignC(&m2str,"clustalw");
    else if(m2c=='o')
	ajStrAssignC(&m2str,"own");


    ma_matrix = ajAcdGetInfile("mamatrixfile");
    gapc      = ajAcdGetFloat("gapopen");
    gapv      = ajAcdGetFloat("gapextend");
    endgaps   = ajAcdGetBoolean("endgaps");
    norgap    = ajAcdGetBoolean("norgap");
    nohgap    = ajAcdGetBoolean("nohgap");
    gap_dist  = ajAcdGetInt("gapdist");
    hgapres   = ajAcdGetString("hgapres");
    maxdiv    = ajAcdGetInt("maxdiv");

    tmp = ajStrNewC("fasta");

    /*
    ** Start by writing sequences into a unique temporary file
    ** get file pointer to unique file
    */


    fil_file = ajSeqoutNew();
    tmpFilename = emma_getUniqueFileName();
    if(!ajSeqoutOpenFilename( fil_file, tmpFilename))
	embExitBad();

    /* Set output format to fasta */
    ajSeqoutSetFormatS( fil_file, tmp);

    while(ajSeqallNext(seqall, &seq))
    {
        /*
        **  Check sequences are all of the same type
        **  Still to be done
        **  Write out sequences
        */
	if (!nb)
	    are_prot  = ajSeqIsProt(seq);
        ajSeqoutWriteSeq(fil_file, seq);
	++nb;
    }
    ajSeqoutClose(fil_file);

    if(nb < 2)
	ajFatal("Multiple alignments need at least two sequences");

    /* Generate clustalw command line */
    cmd = ajStrNewS(ajAcdGetpathC("clustalw"));

    /* add tmp file containing sequences */
    ajStrAppendC(&cmd, " -infile=");
    ajStrAppendS(&cmd, tmpFilename);

    /* add out file name */
    tmp_aln_outfile = emma_getUniqueFileName();
    ajStrAppendC(&cmd, " -outfile=");
    ajStrAppendS(&cmd, tmp_aln_outfile);


    /* calculating just the nj tree or doing full alignment */
    if(only_dend)
        ajStrAppendC(&cmd, " -tree");
    else
        if(!use_dend)
	    ajStrAppendC(&cmd, " -align");

    /* Set sequence type from information from acd file */
    if(are_prot)
        ajStrAppendC(&cmd, " -type=protein");
    else
        ajStrAppendC(&cmd, " -type=dna");


    /*
    **  set output to MSF format - will read in this file later and output
    **  user requested format
    */
    ajStrAppendC(&cmd, " -output=");
    ajStrAppendC(&cmd, "gcg");

    /* If going to do pairwise alignment */
    if(!use_dend)
    {
        /* add fast pairwise alignments*/
        if(!do_slow)
        {
            ajStrAppendC(&cmd, " -quicktree");
            ajStrAppendC(&cmd, " -ktuple=");
            ajStrFromInt(&tmp, ktup);
            ajStrAppendS(&cmd, tmp);
            ajStrAppendC(&cmd, " -window=");
            ajStrFromInt(&tmp, window);
            ajStrAppendS(&cmd, tmp);
            if(nopercent)
                ajStrAppendC(&cmd, " -score=percent");
            else
                ajStrAppendC(&cmd, " -score=absolute");
            ajStrAppendC(&cmd, " -topdiags=");
            ajStrFromInt(&tmp, topdiags);
            ajStrAppendS(&cmd, tmp);
            ajStrAppendC(&cmd, " -pairgap=");
            ajStrFromInt(&tmp, gapw);
            ajStrAppendS(&cmd, tmp);
        }
        else
        {
            if(pairwise_matrix)
            {
		if(are_prot)
		    ajStrAppendC(&cmd, " -pwmatrix=");
		else
		    ajStrAppendC(&cmd, " -pwdnamatrix=");
		ajStrAppendS(&cmd, ajFileGetPrintnameS(pairwise_matrix));
            }
            else
            {
		if(are_prot)
		{
		    ajStrAppendC(&cmd, " -pwmatrix=");
		    ajStrAppendS(&cmd, pwmstr);
		}
		else
		{
		    ajStrAppendC(&cmd, " -pwdnamatrix=");
		    ajStrAppendS(&cmd, pwdstr);
		}
            }
            ajStrAppendC(&cmd, " -pwgapopen=");
            ajStrFromFloat(&tmp, pw_gapc, 3);
            ajStrAppendS(&cmd, tmp);
            ajStrAppendC(&cmd, " -pwgapext=");
            ajStrFromFloat(&tmp, pw_gapv, 3);
            ajStrAppendS(&cmd, tmp);
        }
    }

    /* Multiple alignments */

    /* using existing tree or generating new tree? */
    if(use_dend)
    {
        ajStrAppendC(&cmd, " -usetree=");
        ajStrAppendS(&cmd, dend_filename);
    }
    else
    {
	/* use tmp file to hold dend file, will read back in later */
	tmp_dendfilename = emma_getUniqueFileName();
        ajStrAppendC(&cmd, " -newtree=");
        ajStrAppendS(&cmd, tmp_dendfilename);
    }

    if(ma_matrix)
    {
	if(are_prot)
	    ajStrAppendC(&cmd, " -matrix=");
	else
	    ajStrAppendC(&cmd, " -pwmatrix=");
	ajStrAppendS(&cmd, ajFileGetPrintnameS(ma_matrix));
    }
    else
    {
	if(are_prot)
	{
	    ajStrAppendC(&cmd, " -matrix=");
	    ajStrAppendS(&cmd, m1str);
	}
	else
	{
	    ajStrAppendC(&cmd, " -dnamatrix=");
	    ajStrAppendS(&cmd, m2str);
	}
    }

    ajStrAppendC(&cmd, " -gapopen=");
    ajStrFromFloat(&tmp, gapc, 3);
    ajStrAppendS(&cmd, tmp);
    ajStrAppendC(&cmd, " -gapext=");
    ajStrFromFloat(&tmp, gapv, 3);
    ajStrAppendS(&cmd, tmp);
    ajStrAppendC(&cmd, " -gapdist=");
    ajStrFromInt(&tmp, gap_dist);
    ajStrAppendS(&cmd, tmp);
    ajStrAppendC(&cmd, " -hgapresidues=");
    ajStrAppendS(&cmd, hgapres);

    if(!endgaps)
	ajStrAppendC(&cmd, " -endgaps");

    if(norgap)
	ajStrAppendC(&cmd, " -nopgap");

    if(nohgap)
	ajStrAppendC(&cmd, " -nohgap");

    ajStrAppendC(&cmd, " -maxdiv=");
    ajStrFromInt(&tmp, maxdiv);
    ajStrAppendS(&cmd, tmp);


    /*  run clustalw */

/*    ajFmtError("..%s..\n\n", ajStrGetPtr( cmd)); */
    ajDebug("Executing '%S'\n", cmd);

    ajSysExecS(cmd);

    /* produce alignment file only if one was produced */
    if(!only_dend)
    {
	/* read in tmp alignment output file to output through EMBOSS output */

	seqin = ajSeqinNew();
	/*
	**  add the Usa format to the start of the filename to tell EMBOSS
	**  format of file
	*/
	ajStrInsertC(&tmp_aln_outfile, 0, "msf::");
	ajSeqinUsa(&seqin, tmp_aln_outfile);
	seqset = ajSeqsetNew();
	if(ajSeqsetRead(seqset, seqin))
	{
	    ajSeqoutWriteSet(seqout, seqset);


	    ajSeqoutClose(seqout);
	    ajSeqinDel(&seqin);

	    /* remove the Usa from the start of the string */
	    ajStrCutStart(&tmp_aln_outfile, 5);
	}
	else
	    ajFmtError("Problem writing out EMBOSS alignment file\n");
    }


    /* read in new tmp dend file (if produced) to output through EMBOSS */
    if(tmp_dendfilename!=NULL)
    {
	tmp_dendfile = ajFileNewInNameS( tmp_dendfilename);

	if(tmp_dendfile!=NULL){
	while(ajReadlineTrim(tmp_dendfile, &line))
	    ajFmtPrintF(dend_outfile, "%s\n", ajStrGetPtr( line));

	ajFileClose(&tmp_dendfile);
	ajSysFileUnlinkS(tmp_dendfilename);
    }
    }


    ajSysFileUnlinkS(tmpFilename);

    if(!only_dend)
	ajSysFileUnlinkS(tmp_aln_outfile);

    ajStrDel(&pw_matrix);
    ajStrDel(&matrix);
    ajStrDel(&pw_dna_matrix);
    ajStrDel(&dna_matrix);
    ajStrDel(&tmp_dendfilename);
    ajStrDel(&dend_filename);
    ajStrDel(&tmp_aln_outfile);
    ajStrDel(&pwmstr);
    ajStrDel(&pwdstr);
    ajStrDel(&m1str);
    ajStrDel(&m2str);
    ajStrDel(&hgapres);
    ajStrDel(&cmd);
    ajStrDel(&tmp);
    ajStrDel(&tmpFilename);
    ajStrDel(&line);

    ajFileClose(&dend_outfile);
    ajFileClose(&tmp_dendfile);
    ajFileClose(&dend_file);
    ajFileClose(&pairwise_matrix);
    ajFileClose(&ma_matrix);

    ajSeqallDel(&seqall);
    ajSeqsetDel(&seqset);
    ajSeqDel(&seq);
    ajSeqoutDel(&seqout);
    ajSeqoutDel(&fil_file);
    ajSeqinDel(&seqin);

    embExit();

    return 0;
}
Example #2
0
int main(int argc, char **argv)
{
    AjPSeqset seqset = NULL;
    AjPStr    cl     = NULL;
    AjPSeqout seqout = NULL;

    AjBool    full   = ajFalse;

    AjPStr    fn     = NULL;
    AjPStr    stmp   = NULL;
    
    AjPStr    outfname = NULL;
    
    
    embInitPV("echlorop", argc, argv, "CBSTOOLS", VERSION);


    seqset  = ajAcdGetSeqset("sequence");
    outfname= ajAcdGetOutfileName("outfile");
    full    = ajAcdGetBoolean("full");
    
    cl   = ajStrNewS(ajAcdGetpathC("chlorop"));
    fn   = ajStrNew();
    stmp = ajStrNew();
    


    ajFilenameSetTempname(&fn);
    seqout = ajSeqoutNew();
    if(!ajSeqoutOpenFilename(seqout, fn))
	ajFatal("Cannot open temporary file %S",fn);
    ajSeqoutSetFormatC(seqout, "fasta");
    ajSeqoutWriteSet(seqout,seqset);
    ajSeqoutClose(seqout);

    if(full)
        ajStrAppendC(&cl," -F");

    ajFmtPrintS(&stmp," %S",fn);
    ajStrAppendS(&cl,stmp);


#if 0
    ajFmtPrint("%S\n",cl);
#endif

#if 1
    ajSysExecOutnameAppendS(cl, outfname);
#endif

    ajSysFileUnlinkS(fn);

    ajStrDel(&cl);
    ajStrDel(&fn);
    ajStrDel(&stmp);
    ajSeqoutDel(&seqout);
    ajSeqsetDel(&seqset);
    ajStrDel(&outfname);
    
    embExit();

    return 0;
}
Example #3
0
int main(int argc, char **argv)
{
    AjPSeqall nucseq;		/* input nucleic sequences */
    AjPSeqset protseq;		/* input aligned protein sequences */
    AjPSeqout seqout;
    AjPSeq nseq;		/* next nucleic sequence to align */
    const AjPSeq pseq;		/* next protein sequence use in alignment */
    AjPTrn trnTable;
    AjPSeq pep;			/* translation of nseq */
    AjPStr tablelist;
    ajint table;
    AjPSeqset outseqset;	/* set of aligned nucleic sequences */
    ajint proteinseqcount = 0;
    AjPStr degapstr = NULL;
    /* used to check if it matches with START removed */
    AjPStr degapstr2 = NULL;
    AjPStr codon = NULL;	/* holds temporary codon to check if is START */
    char aa;			/* translated putative START codon */
    ajint type;			/* returned type of the putative START codon */
    /* start position of guide protein in translation */
    ajlong pos = 0;
    AjPSeq newseq = NULL;	/* output aligned nucleic sequence */
    ajint frame;

    embInit("tranalign", argc, argv);

    nucseq    = ajAcdGetSeqall("asequence");
    protseq   = ajAcdGetSeqset("bsequence");
    tablelist = ajAcdGetListSingle("table");
    seqout    = ajAcdGetSeqoutset("outseq");

    outseqset = ajSeqsetNew();
    degapstr  = ajStrNew();

    /* initialise the translation table */
    ajStrToInt(tablelist, &table);
    trnTable = ajTrnNewI(table);

    ajSeqsetFill(protseq);

    while(ajSeqallNext(nucseq, &nseq))
    {
    	if((pseq = ajSeqsetGetseqSeq(protseq, proteinseqcount++)) == NULL)
    	    ajErr("No guide protein sequence available for "
		  "nucleic sequence %S",
		  ajSeqGetNameS(nseq));

	ajDebug("Aligning %S and %S\n",
		ajSeqGetNameS(nseq), ajSeqGetNameS(pseq));

        /* get copy of pseq string with no gaps */
        ajStrAssignS(&degapstr, ajSeqGetSeqS(pseq));
        ajStrRemoveGap(&degapstr);

        /*
	** for each translation frame look for subset of pep that
	** matches pseq
	*/
        for(frame = 1; frame <4; frame++)
	{
	    ajDebug("trying frame %d\n", frame);
            pep = ajTrnSeqOrig(trnTable, nseq, frame);
            degapstr2 = ajStrNew();
            ajStrAssignRef(&degapstr2, degapstr);
            pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr);

            /* 
            ** we might have a START codon that should be translated as 'M'
            ** we need to check if there is a match after a possible START
            ** codon 
            */
            if(pos == -1 && ajStrGetLen(degapstr) > 1 && 
                (ajStrGetPtr(degapstr)[0] == 'M' ||
		 ajStrGetPtr(degapstr)[0] == 'm'))
	      {
                /* see if pep minus the first character is a match */
                ajStrCutStart(&degapstr2, 1);
                pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr2); 

                /*
		** pos is >= 1 if we have a match that is after the first
		** residue
		*/
                if(pos >= 1)
		{
                    /* point back at the putative START Methionine */
                    pos--;
                    /* test if first codon is a START */
                    codon = ajStrNew();
                    ajStrAssignSubS(&codon, ajSeqGetSeqS(nseq), 
                                (pos*3)+frame-1, (pos*3)+frame+2);
                    type = ajTrnCodonstrTypeS(trnTable, codon, &aa);

                    if(type != 1)
                    {
                        /* first codon is not a valid START, force a mismatch */
                        pos = -1;
                    }
                    ajStrDel(&codon);
                
            	}
		else
		{
                    /* force 'pos == 0' to be treated as a mismatch */
            	    pos = -1;
		}
            }

            ajStrDel(&degapstr2);
            ajSeqDel(&pep);

            if(pos != -1)
            	break;
        }

        if(pos == -1)
	    ajErr("Guide protein sequence %S not found in nucleic sequence %S",
		  ajSeqGetNameS(pseq), ajSeqGetNameS(nseq));
	else
	{
	    ajDebug("got a match with frame=%d\n", frame);
            /* extract the coding region of nseq with gaps */
            newseq = ajSeqNew();
            ajSeqSetNuc(newseq);
            ajSeqAssignNameS(newseq, ajSeqGetNameS(nseq));
            ajSeqAssignDescS(newseq, ajSeqGetDescS(nseq));
            tranalign_AddGaps(newseq, nseq, pseq, (pos*3)+frame-1);

            /* output the gapped nucleic sequence */
            ajSeqsetApp(outseqset, newseq);

            ajSeqDel(&newseq);
        }

        ajStrRemoveWhiteExcess(&degapstr);
    }

    ajSeqoutWriteSet(seqout, outseqset);
    ajSeqoutClose(seqout);

    ajTrnDel(&trnTable);
    ajSeqsetDel(&outseqset);
    ajStrDel(&degapstr);
    ajStrDel(&degapstr2);

    ajSeqallDel(&nucseq);
    ajSeqDel(&nseq);
    ajSeqoutDel(&seqout);
    ajSeqsetDel(&protseq);
    ajStrDel(&tablelist);

    embExit();

    return 0;
}
Example #4
0
int main(int argc, char **argv)
{
    AjPSeqset seqset = NULL;
    AjPStr    cl     = NULL;
    AjPSeqout seqout = NULL;
    float     cutoff = 0.;
    AjBool    best   = ajFalse;
    AjBool    gff    = ajFalse;
    AjBool    two    = ajFalse;
    AjBool    kinase = ajFalse;
    AjPStr    rsd    = NULL;
    AjBool    addseq = ajFalse;
    AjBool    plot   = ajFalse;

    const AjPStr ofn = NULL;
    AjPStr    fn     = NULL;
    AjPStr    stmp   = NULL;
    
    AjPFile outf = NULL;
    
    
    embInitPV("enetphos", argc, argv, "CBSTOOLS", VERSION);


    seqset  = ajAcdGetSeqset("sequence");
    outf    = ajAcdGetOutfile("outfile");
    plot    = ajAcdGetBoolean("plot");
    best    = ajAcdGetBoolean("best");
    gff     = ajAcdGetBoolean("gff");
    two     = ajAcdGetBoolean("two");
    kinase  = ajAcdGetBoolean("kinase");
    addseq  = ajAcdGetBoolean("addseq");
    cutoff  = ajAcdGetFloat("cutoff");
    rsd     = ajAcdGetListSingle("residue");
    
    
    cl   = ajStrNewC("netphos ");
    fn   = ajStrNew();
    stmp = ajStrNew();
    


    ajFilenameSetTempname(&fn);
    seqout = ajSeqoutNew();
    if(!ajSeqoutOpenFilename(seqout, fn))
	ajFatal("Cannot open temporary file %S",fn);
    ajSeqoutSetFormatC(seqout, "fasta");
    ajSeqoutWriteSet(seqout,seqset);
    ajSeqoutClose(seqout);


    if(ajStrMatchC(rsd,"serine"))
        ajStrAppendC(&cl," -s");
    else if(ajStrMatchC(rsd,"threonine"))
        ajStrAppendC(&cl," -t");
    else if(ajStrMatchC(rsd,"tyrosine"))
        ajStrAppendC(&cl," -y");
    
    if(plot)
        ajStrAppendC(&cl," -g");

    if(two)
        ajStrAppendC(&cl," -2");
    
    if(best)
        ajStrAppendC(&cl," -b");

    if(gff)
        ajStrAppendC(&cl," -f gff");

    if(kinase)
        ajStrAppendC(&cl," -k");
    
    if(addseq)
        ajStrAppendC(&cl," -S");

    ajFmtPrintS(&stmp," -c %f",cutoff);
    ajStrAppendS(&cl,stmp);

    ajFmtPrintS(&stmp," %S",fn);
    ajStrAppendS(&cl,stmp);

    ofn = ajFileGetNameS(outf);
    ajFmtPrintS(&stmp," > %S",ofn);
    ajStrAppendS(&cl,stmp);
    ajFileClose(&outf);

#if 0
   ajFmtPrint("%S\n",cl);
#endif

#if 1
   if(system(ajStrGetPtr(cl)) == -1)
       ajFatal("Command %S failed",cl);
#endif

    ajSysFileUnlink(fn);

    ajStrDel(&cl);
    ajStrDel(&fn);
    ajStrDel(&stmp);
    ajStrDel(&rsd);
    ajSeqoutDel(&seqout);
    ajSeqsetDel(&seqset);

    embExit();

    return 0;
}
Example #5
0
int main(int argc, char **argv)
{
    /* ACD data item variables */
    AjPSeqset  dataset   = NULL;
    AjPFile    bfile     = NULL;
    AjPFile    plib      = NULL;
    AjPStr     mod       = NULL;
    ajint      nmotifs   = 0;
    AjBool     text      = ajFalse;
    AjPStr     prior     = NULL;
    float      evt       = 0.0;
    ajint      nsites    = 0;
    ajint      minsites  = 0;
    ajint      maxsites  = 0;
    float      wnsites   = 0.0;
    ajint      w         = 0;
    ajint      minw      = 0;
    ajint      maxw      = 0;
    AjBool     nomatrim  = ajFalse;
    ajint      wg        = 0;
    ajint      ws        = 0;
    AjBool     noendgaps = ajFalse;
    AjBool     revcomp   = ajFalse;
    AjBool     pal       = ajFalse;
    AjBool     nostatus  = ajFalse;
    ajint      maxiter   = 0;
    float      distance  = 0.0;
    float      b         = 0.0;
    float      spfuzz    = 0.0;
    AjPStr     spmap     = NULL;
    AjPStr     cons      = NULL;
    ajint      maxsize   = 0;
    ajint      p         = 0;
    ajint      time      = 0;
    AjPStr     sf        = NULL;
    ajint      heapsize  = 64;
    AjBool     xbranch   = ajFalse;
    AjBool     wbranch   = ajFalse;
    ajint      bfactor   = 0;
    AjPFile    outtext   = NULL;
    
    /* Housekeeping variables */
    AjPStr     cmd       = NULL;
    AjPStr     ssname    = NULL;      
    AjPSeqout  outseq    = NULL;   
    AjPStr     tmp       = NULL;
    char       option;



    
    /* ACD file processing */
    embInitPV("ememetext",argc,argv,"MEME",VERSION);
    dataset   = ajAcdGetSeqset("dataset");
    bfile     = ajAcdGetInfile("bfile");
    plib      = ajAcdGetInfile("plibfile");
    mod       = ajAcdGetSelectSingle("mod");
    nmotifs   = ajAcdGetInt("nmotifs");
    text      = ajAcdGetBoolean("text");
    prior     = ajAcdGetSelectSingle("prior");
    evt       = ajAcdGetFloat("evt");
    nsites    = ajAcdGetInt("nsites");
    minsites  = ajAcdGetInt("minsites");
    maxsites  = ajAcdGetInt("maxsites");
    wnsites   = ajAcdGetFloat("wnsites");
    w         = ajAcdGetInt("w");
    minw      = ajAcdGetInt("minw");
    maxw      = ajAcdGetInt("maxw");
    nomatrim  = ajAcdGetBoolean("nomatrim");
    wg        = ajAcdGetInt("wg");
    ws        = ajAcdGetInt("ws");
    noendgaps = ajAcdGetBoolean("noendgaps");
    revcomp   = ajAcdGetBoolean("revcomp");
    pal       = ajAcdGetBoolean("pal");
    nostatus  = ajAcdGetBoolean("nostatus");
    maxiter   = ajAcdGetInt("maxiter");
    distance  = ajAcdGetFloat("distance");
    b         = ajAcdGetFloat("b");
    spfuzz    = ajAcdGetFloat("spfuzz");
    spmap     = ajAcdGetSelectSingle("spmap");
    cons      = ajAcdGetString("cons");
    maxsize   = ajAcdGetInt("maxsize");
    p         = ajAcdGetInt("p");
    time      = ajAcdGetInt("time");
    sf        = ajAcdGetString("sf");
    heapsize  = ajAcdGetInt("heapsize");
    xbranch   = ajAcdGetBoolean("xbranch");
    wbranch   = ajAcdGetBoolean("wbranch");
    bfactor   = ajAcdGetInt("bfactor");    

    outtext   = ajAcdGetOutfile("outtext");
    outseq    = ajAcdGetSeqoutset("outseq");
    
    

    /* MAIN APPLICATION CODE */
    /* 1. Housekeeping */
    cmd      = ajStrNew();
    tmp      = ajStrNew();
    
    /* 2. Re-write dataset to a temporary file in a format (fasta) MEME
    ** can understand.
    ** Can't just pass the name of dataset to MEME as the name provided
    ** might be a USA which MEME would not understand.
    */

    ssname = ajStrNewS(ajFileGetNameS(outseq->File));
    
    ajSeqoutSetFormatC(outseq, "fasta");
    ajSeqoutWriteSet(outseq, dataset);
    ajSeqoutClose(outseq);
    ajSeqoutDel(&outseq);


    /* 3. Build ememe command line */
    /* Command line is built in this order: 
       i.  Application name.
       ii. Original MEME options (in order they appear in ACD file)
       iii.Original MEME options (that don't appear in ACD file)
       iv. EMBASSY MEME new qualifiers and parameters.
       */
    ajStrAssignS(&cmd, ajAcdGetpathC("meme"));
    ajFmtPrintAppS(&cmd, " %S", ssname);

    if(bfile)
	ajFmtPrintAppS(&cmd, " -bfile %s ", ajFileGetNameC(bfile));

    if(plib)
	ajFmtPrintAppS(&cmd, " -plib %s ", ajFileGetNameC(plib));

    option = ajStrGetCharFirst(mod);
    if(option == 'o')
	ajStrAppendC(&cmd, " -mod oops ");
    else if(option == 'z')
	ajStrAppendC(&cmd, " -mod zoops ");
    else if(option == 'a')
	ajStrAppendC(&cmd, " -mod anr ");

    if(nmotifs != 1)
        ajFmtPrintAppS(&cmd, " -nmotifs %d ", nmotifs);

    if(text)
	ajFmtPrintAppS(&cmd, " -text ");

    ajFmtPrintAppS(&cmd,  " -prior %S ", prior);

    if(evt != -1)
	ajFmtPrintAppS(&cmd, " -evt %f ", evt);

    if(nsites != -1)
	ajFmtPrintAppS(&cmd, " -nsites %d ", nsites);
    else
    {
	if(minsites != -1)
	    ajFmtPrintAppS(&cmd, " -minsites %d ", minsites);
	if(maxsites != -1)
	    ajFmtPrintAppS(&cmd, " -maxsites %d ", maxsites);
    }

    if(wnsites < 0.7999 || wnsites > .8001)
        ajFmtPrintAppS(&cmd, " -wnsites %f ", wnsites);

    if(w != -1)
	ajFmtPrintAppS(&cmd, " -w %d ", w);

    if(minw != 8)
        ajFmtPrintAppS(&cmd, " -minw %d ", minw);

    if(maxw != 50)
        ajFmtPrintAppS(&cmd, " -maxw %d ", maxw);

    if(nomatrim)
	ajFmtPrintAppS(&cmd, " -nomatrim ");


    if(wg != 11)
        ajFmtPrintAppS(&cmd, " -wg %d ", wg);

    if(ws != 1)
        ajFmtPrintAppS(&cmd, " -ws %d ", ws);

    if(noendgaps)
	ajFmtPrintAppS(&cmd, " -noendgaps ");

    if(revcomp)
	ajFmtPrintAppS(&cmd, " -revcomp ");

    if(pal && ajSeqsetIsNuc(dataset))
	ajFmtPrintAppS(&cmd, " -pal ");

    if(nostatus)
	ajFmtPrintAppS(&cmd, " -nostatus ");

    if(maxiter != 50)
        ajFmtPrintAppS(&cmd, " -maxiter %d ", maxiter);

    if(distance < 0.00099 || distance > 0.00101)
        ajFmtPrintAppS(&cmd, " -distance %f ", distance);

    if(b != -1)
	ajFmtPrintAppS(&cmd, " -b %f ", b);

    if(spfuzz != -1)
	ajFmtPrintAppS(&cmd, " -spfuzz %f ", spfuzz);

    
    if(!ajStrMatchC(spmap,"default"))
        ajFmtPrintAppS(&cmd,  " -spmap %S ", spmap);

    if(MAJSTRGETLEN(cons))
	ajFmtPrintAppS(&cmd, "-cons %S", cons);

    if(maxsize != -1)
        ajFmtPrintAppS(&cmd, " -maxsize %d ", maxsize);

    if(p > 0)
	ajFmtPrintAppS(&cmd, " -p %d ", p);

    if(time > 0)
	ajFmtPrintAppS(&cmd, " -time %d ", time);

    if(MAJSTRGETLEN(sf))
	ajFmtPrintAppS(&cmd, " -sf %S", sf);

    if(heapsize != 64)
 	ajFmtPrintAppS(&cmd, " -heapsize %d ", heapsize);

    if(xbranch)
        ajFmtPrintAppS(&cmd, " -x_branch");

    if(wbranch)
        ajFmtPrintAppS(&cmd, " -w_branch");
    
    if(bfactor != 3)
 	ajFmtPrintAppS(&cmd, " -bfactor %d ", bfactor);

    if(ajSeqsetIsProt(dataset))
	ajFmtPrintAppS(&cmd, "-protein ");
    else
	ajFmtPrintAppS(&cmd, "-dna ");

    ajFmtPrintAppS(&cmd, " -text");

    ajFmtPrintAppS(&cmd, " > %S ", ajFileGetNameS(outtext));


    /* 4. Close files from ACD before calling meme */	
    ajFileClose(&bfile);	
    ajFileClose(&plib);


    /* 5. Call meme */
    /* ajFmtPrint("\n%S\n", cmd); */
    system(ajStrGetPtr(cmd));    


    /* 6. Exit cleanly */

    ajSeqsetDel(&dataset);
    ajStrDel(&cons);
    ajStrDel(&sf);
    ajStrDel(&mod);
    ajStrDel(&prior);
    ajStrDel(&spmap);

    ajStrDel(&cmd);
    ajStrDel(&ssname);
    ajStrDel(&tmp);
    
    ajFileClose(&bfile);
    ajFileClose(&plib);
    ajFileClose(&outtext);
    ajSeqoutDel(&outseq);
    
    embExit();

    return 0;
}
Example #6
0
int main(int argc, char **argv)
{
    /* ACD data item variables */
    AjPFile    hmmfile = NULL;     
    AjPSeqset  seqfile = NULL;     
    AjPFile     mapali = NULL;     
    AjPFile    withali = NULL;     
    AjPAlign         o = NULL;     
    AjBool           m = ajFalse;
    AjBool           q = ajFalse;

    /* Housekeeping variables */
    AjPStr        cmd = NULL;
    AjPStr        fmt = NULL;
    AjBool      fmtok = ajFalse;
    AjPStr        rnd = NULL;      
    AjPSeqout    rndo = NULL;      
    


    /* ACD file processing */
    embInitPV("ehmmalign",argc,argv,"HMMERNEW",VERSION);

    hmmfile = ajAcdGetInfile("hmmfile");
    seqfile = ajAcdGetSeqset("seqfile");
    mapali  = ajAcdGetInfile("mapali");
    withali = ajAcdGetInfile("withali");
    o       = ajAcdGetAlign("o");
    m       = ajAcdGetBoolean("m");
    q       = ajAcdGetBoolean("q");





    /* MAIN APPLICATION CODE */
    /* 1. Housekeeping */
    cmd  = ajStrNew();
    fmt  = ajStrNew();
    rnd  = ajStrNew();



    
    /* 2. Re-write seqfile to a temporary file in a format (fasta) HMMER can understand.
       We cannot just pass the name of seqfile to HMMER as the name provided might be a 
       USA which HMMER would not understand. */
    ajFilenameSetTempname(&rnd);
    rndo = ajSeqoutNew();
    if(!ajSeqoutOpenFilename(rndo, rnd))
	ajFatal("Terminal ajSeqFileNewOut failure. Email EMBOSS helpdesk!\n");
    ajSeqoutSetFormatC(rndo, "fasta");
    ajSeqoutWriteSet(rndo, seqfile);
    ajSeqoutClose(rndo);
    ajSeqoutDel(&rndo);


    /* 3. Build hmmalign command line */
    /* Command line is built in this order: 
       i.  Application name.
       ii. HMMER 'options' (in order they appear in ACD file)
       iii.HMMER 'options' (that don't appear in ACD file)
       iv. HMMER & new parameters.
       */
    ajFmtPrintS(&cmd, "%S ", ajAcdGetpathC("hmmalign"));
    if(mapali)
	ajFmtPrintAppS(&cmd, " --mapali %s ", ajFileGetNameC(mapali));
    if(withali)
	ajFmtPrintAppS(&cmd, " --withali %s ", ajFileGetNameC(withali));
    if(m)
	ajStrAppendC(&cmd, " -m ");
    if(q)
	ajStrAppendC(&cmd, " -q ");


    /* Ensure output alignment is in user-specified format. */
    fmtok=ajTrue;
    ajStrAssignS(&fmt, ajAlignGetFormat(o));
    /* fasta and a2m are identical formats. */
    if(ajStrMatchC(fmt, "fasta"))
	ajStrAssignC(&fmt, "A2M");
    else if(ajStrMatchC(fmt, "a2m"))
	ajStrAssignC(&fmt, "A2M");
    else if(ajStrMatchC(fmt, "msf"))
	ajStrAssignC(&fmt, "MSF");
    else if(ajStrMatchC(fmt, "phylip"))
	ajStrAssignC(&fmt, "PHYLIP");
    /* hmmer also supports stockholm, SELEX & Clustal output, EMBOSS does not.
       EMBOSS supports unknown/multiple/simple and srs output, hmmer does not. */ 
    else
	fmtok = ajFalse;

    if(!fmtok)
    {
	/* This could be replaced with code to reformat the file. */
	ajWarn("Specified output alignment format ('o' ACD option) is "
	       "not understood by HMMER.  Using stockholm format instead.");
	ajStrAssignC(&fmt, "Stockholm");
    }
       

    /* rnd is the name of the rewritten seqfile.  MUST specify FASTA format explicitly. */
    ajFmtPrintAppS(&cmd, " --informat FASTA --outformat %S  -o %s %s %S", 
		   fmt,
		   ajAlignGetFilename(o),
		   ajFileGetNameC(hmmfile),
		   rnd);
            
    /* 4. Close ACD files */
    ajFileClose(&hmmfile);    
    ajSeqsetDel(&seqfile);
    ajFileClose(&mapali);
    ajFileClose(&withali);
    ajAlignClose(o);
    ajAlignDel(&o);

    
    /* 5. Call hmmalign */
    ajFmtPrint("\n%S\n\n", cmd);
    ajSysExecS(cmd);


    /* 6. Exit cleanly */
    ajSysFileUnlinkS(rnd); 
    
    ajStrDel(&cmd);
    ajStrDel(&fmt);
    ajStrDel(&rnd);
    embExit();

    return 0;
}
Example #7
0
int main(int argc, char **argv)
{
    AjPSeqset seqset = NULL;
    AjPStr    cl     = NULL;
    AjPSeqout seqout = NULL;
    AjBool    pc     = ajFalse;
    AjBool    sigp   = ajFalse;
    AjBool    plot   = ajFalse;

    const AjPStr ofn = NULL;
    AjPStr    fn     = NULL;
    AjPStr    stmp   = NULL;

    AjPFile outf = NULL;


    embInitPV("eprop", argc, argv, "CBSTOOLS", VERSION);


    seqset  = ajAcdGetSeqset("sequence");
    outf    = ajAcdGetOutfile("outfile");
    plot    = ajAcdGetBoolean("plot");
    pc      = ajAcdGetBoolean("pcprediction");
    sigp    = ajAcdGetBoolean("signalp");

    cl   = ajStrNewC("prop ");
    fn   = ajStrNew();
    stmp = ajStrNew();



    ajFilenameSetTempname(&fn);
    seqout = ajSeqoutNew();
    if(!ajSeqoutOpenFilename(seqout, fn))
        ajFatal("Cannot open temporary file %S",fn);
    ajSeqoutSetFormatC(seqout, "fasta");
    ajSeqoutWriteSet(seqout,seqset);
    ajSeqoutClose(seqout);


    if(plot)
        ajStrAppendC(&cl," -g");

    if(pc)
        ajStrAppendC(&cl," -p");

    if(sigp)
        ajStrAppendC(&cl," -s");

    ajFmtPrintS(&stmp," %S",fn);
    ajStrAppendS(&cl,stmp);

    ofn = ajFileGetNameS(outf);
    ajFmtPrintS(&stmp," > %S",ofn);
    ajStrAppendS(&cl,stmp);
    ajFileClose(&outf);

#if 0
    ajFmtPrint("%S\n",cl);
#endif

#if 1
    if(system(ajStrGetPtr(cl)) == -1)
        ajFatal("Command %S failed",cl);
#endif

    ajSysFileUnlink(fn);

    ajStrDel(&cl);
    ajStrDel(&fn);
    ajStrDel(&stmp);
    ajSeqoutDel(&seqout);
    ajSeqsetDel(&seqset);

    embExit();

    return 0;
}
Example #8
0
int main(int argc, char **argv)
{
    AjPStr    cl     = NULL;
    AjPSeqset seqset = NULL;
    AjPSeqout seqout = NULL;
    
    AjPStr    stmp   = NULL;
    AjPStr    email  = NULL;
    AjPStr    fmt    = NULL;
    AjPStr    trtab  = NULL;
    const AjPStr ofn = NULL;
    
    AjPStr    *applist = NULL;

    
    AjBool crc = ajFalse;
    AjBool alt = ajFalse;

    AjBool iprlook = ajFalse;
    AjBool goterms = ajFalse;
    
    ajint trlen = 0;
    
    
    ajuint i    = 0;
    ajuint lcnt = 0;
    
    
    AjPStr fn = NULL;
    AjPFile outf = NULL;
    
    
    embInitPV("eiprscan", argc, argv, "IPRSCAN", VERSION);


    seqset  = ajAcdGetSeqset("sequence");
    email   = ajAcdGetString("email");
    crc     = ajAcdGetBoolean("crc");
    applist = ajAcdGetList("appl");
    fmt     = ajAcdGetListSingle("format");
    trtab   = ajAcdGetListSingle("trtable");
    trlen   = ajAcdGetInt("trlen");
    alt     = ajAcdGetBoolean("altjobs");
    iprlook = ajAcdGetBoolean("iprlookup");
    goterms = ajAcdGetBoolean("goterms");
    outf    = ajAcdGetOutfile("outfile");
    
    stmp = ajStrNew();
    cl   = ajStrNewC("iprscan -cli");
    fn   = ajStrNew();
    


    ajFilenameSetTempname(&fn);
    seqout = ajSeqoutNew();
    if(!ajSeqoutOpenFilename(seqout, fn))
	ajFatal("Cannot open temporary file %S",fn);
    ajSeqoutSetFormatC(seqout, "fasta");
    ajSeqoutWriteSet(seqout,seqset);
    ajSeqoutClose(seqout);

    ajFmtPrintS(&stmp," -i %S",fn);
    ajStrAppendS(&cl,stmp);

    if(!ajSeqsetIsProt(seqset))
        ajStrAppendC(&cl," -seqtype n");

    if(!crc)
        ajStrAppendC(&cl," -nocrc");

    if(ajStrGetLen(email))
    {
        ajFmtPrintS(&stmp," -email %S",email);
        ajStrAppendS(&cl,stmp);
    }
    

    i = 0;
    lcnt = 0;
    
    while(applist[i])
    {
        ++lcnt;
        ++i;
    }
    

    i = 0;
    while(applist[i])
    {
        if(ajStrMatchC(applist[i],"all"))
        {
            if(lcnt != 1)
                ajFatal("Cannot specify 'all' with multiple "
                        "applications");
            ++i;
            continue;
        }
        
        ajFmtPrintS(&stmp," -appl %S",applist[i]);
        ajStrAppendS(&cl,stmp);
        ++i;
    }
    
    ajFmtPrintS(&stmp," -format %S",fmt);
    ajStrAppendS(&cl,stmp);



    ajFmtPrintS(&stmp," -trtable %S",trtab);
    ajStrAppendS(&cl,stmp);

    ajFmtPrintS(&stmp," -trlen %d",trlen);
    ajStrAppendS(&cl,stmp);

    if(alt)
        ajStrAppendC(&cl," -altjobs");

    if(iprlook)
        ajStrAppendC(&cl," -iprlookup");

    if(goterms)
        ajStrAppendC(&cl," -goterms");


    ofn = ajFileGetNameS(outf);
    ajFmtPrintS(&stmp," -o %S",ofn);
    ajFileClose(&outf);
    ajStrAppendS(&cl,stmp);

    
#if 0
   ajFmtPrint("%S\n",cl);
#endif

#if 1
    system(ajStrGetPtr(cl));
#endif


    ajSysFileUnlink(fn);
    
    
    ajStrDel(&cl);
    ajStrDel(&fn);
    ajStrDel(&stmp);
    ajStrDel(&email);
    ajStrDel(&fmt);
    ajStrDel(&trtab);
    ajStrDelarray(&applist);
    ajSeqoutDel(&seqout);
    ajSeqsetDel(&seqset);
    
    embExit();

    return 0;
}
Example #9
0
int main(int argc, char **argv)
{
    AjPSeqset seqset  = NULL;
    AjPStr    cl      = NULL;
    AjPSeqout seqout  = NULL;
    float     thresh  = 0.;
    AjBool    netphos = ajFalse;
    AjPStr    format  = NULL;
    AjBool    plot    = ajFalse;

    AjPStr    fn     = NULL;
    AjPStr    stmp   = NULL;
    
    AjPStr  outfname = NULL;
    
    
    embInitPV("eyinoyang", argc, argv, "CBSTOOLS",VERSION);


    seqset  = ajAcdGetSeqset("sequence");
    outfname= ajAcdGetOutfileName("outfile");
    plot    = ajAcdGetBoolean("plot");
    netphos = ajAcdGetBoolean("netphos");
    thresh  = ajAcdGetFloat("threshold");
    format  = ajAcdGetListSingle("format");


    cl   = ajStrNewS(ajAcdGetpathC("yinOyang"));
    fn   = ajStrNew();
    stmp = ajStrNew();



    ajFilenameSetTempname(&fn);
    seqout = ajSeqoutNew();
    if(!ajSeqoutOpenFilename(seqout, fn))
	ajFatal("Cannot open temporary file %S",fn);
    ajSeqoutSetFormatC(seqout, "fasta");
    ajSeqoutWriteSet(seqout,seqset);
    ajSeqoutClose(seqout);


    if(ajStrMatchC(format,"short"))
        ajStrAppendC(&cl," -f s");
    else if(ajStrMatchC(format,"long"))
        ajStrAppendC(&cl," -f l");
    
    if(plot)
        ajStrAppendC(&cl," -g");

    if(netphos)
        ajStrAppendC(&cl," -y");
    
    if(netphos)
    {
        ajFmtPrintS(&stmp," -t %f",thresh);
        ajStrAppendS(&cl,stmp);
    }

    ajFmtPrintS(&stmp," %S",fn);
    ajStrAppendS(&cl,stmp);

#if 0
    ajFmtPrint("%`S\n",cl);
#endif

#if 1
    ajSysExecOutnameAppendS(cl, outfname);
#endif

    ajSysFileUnlinkS(fn);

    ajStrDel(&cl);
    ajStrDel(&fn);
    ajStrDel(&stmp);
    ajStrDel(&format);
    ajSeqoutDel(&seqout);
    ajSeqsetDel(&seqset);
    ajStrDel(&outfname);

    embExit();

    return 0;
}