Пример #1
0
int main(int argc, char **argv)
{
    /* ACD data item variables */
    AjPSeqset  dataset   = NULL;
    AjPFile    bfile     = NULL;
    AjPFile    plib      = NULL;
    AjPStr     mod       = NULL;
    ajint      nmotifs   = 0;
    AjBool     text      = ajFalse;
    AjPStr     prior     = NULL;
    float      evt       = 0.0;
    ajint      nsites    = 0;
    ajint      minsites  = 0;
    ajint      maxsites  = 0;
    float      wnsites   = 0.0;
    ajint      w         = 0;
    ajint      minw      = 0;
    ajint      maxw      = 0;
    AjBool     nomatrim  = ajFalse;
    ajint      wg        = 0;
    ajint      ws        = 0;
    AjBool     noendgaps = ajFalse;
    AjBool     revcomp   = ajFalse;
    AjBool     pal       = ajFalse;
    AjBool     nostatus  = ajFalse;
    ajint      maxiter   = 0;
    float      distance  = 0.0;
    float      b         = 0.0;
    float      spfuzz    = 0.0;
    AjPStr     spmap     = NULL;
    AjPStr     cons      = NULL;
    ajint      maxsize   = 0;
    ajint      p         = 0;
    ajint      time      = 0;
    AjPStr     sf        = NULL;
    ajint      heapsize  = 64;
    AjBool     xbranch   = ajFalse;
    AjBool     wbranch   = ajFalse;
    ajint      bfactor   = 0;
    AjPFile    outtext   = NULL;
    
    /* Housekeeping variables */
    AjPStr     cmd       = NULL;
    AjPStr     ssname    = NULL;      
    AjPSeqout  outseq    = NULL;   
    AjPStr     tmp       = NULL;
    char       option;



    
    /* ACD file processing */
    embInitPV("ememetext",argc,argv,"MEME",VERSION);
    dataset   = ajAcdGetSeqset("dataset");
    bfile     = ajAcdGetInfile("bfile");
    plib      = ajAcdGetInfile("plibfile");
    mod       = ajAcdGetSelectSingle("mod");
    nmotifs   = ajAcdGetInt("nmotifs");
    text      = ajAcdGetBoolean("text");
    prior     = ajAcdGetSelectSingle("prior");
    evt       = ajAcdGetFloat("evt");
    nsites    = ajAcdGetInt("nsites");
    minsites  = ajAcdGetInt("minsites");
    maxsites  = ajAcdGetInt("maxsites");
    wnsites   = ajAcdGetFloat("wnsites");
    w         = ajAcdGetInt("w");
    minw      = ajAcdGetInt("minw");
    maxw      = ajAcdGetInt("maxw");
    nomatrim  = ajAcdGetBoolean("nomatrim");
    wg        = ajAcdGetInt("wg");
    ws        = ajAcdGetInt("ws");
    noendgaps = ajAcdGetBoolean("noendgaps");
    revcomp   = ajAcdGetBoolean("revcomp");
    pal       = ajAcdGetBoolean("pal");
    nostatus  = ajAcdGetBoolean("nostatus");
    maxiter   = ajAcdGetInt("maxiter");
    distance  = ajAcdGetFloat("distance");
    b         = ajAcdGetFloat("b");
    spfuzz    = ajAcdGetFloat("spfuzz");
    spmap     = ajAcdGetSelectSingle("spmap");
    cons      = ajAcdGetString("cons");
    maxsize   = ajAcdGetInt("maxsize");
    p         = ajAcdGetInt("p");
    time      = ajAcdGetInt("time");
    sf        = ajAcdGetString("sf");
    heapsize  = ajAcdGetInt("heapsize");
    xbranch   = ajAcdGetBoolean("xbranch");
    wbranch   = ajAcdGetBoolean("wbranch");
    bfactor   = ajAcdGetInt("bfactor");    

    outtext   = ajAcdGetOutfile("outtext");
    outseq    = ajAcdGetSeqoutset("outseq");
    
    

    /* MAIN APPLICATION CODE */
    /* 1. Housekeeping */
    cmd      = ajStrNew();
    tmp      = ajStrNew();
    
    /* 2. Re-write dataset to a temporary file in a format (fasta) MEME
    ** can understand.
    ** Can't just pass the name of dataset to MEME as the name provided
    ** might be a USA which MEME would not understand.
    */

    ssname = ajStrNewS(ajFileGetNameS(outseq->File));
    
    ajSeqoutSetFormatC(outseq, "fasta");
    ajSeqoutWriteSet(outseq, dataset);
    ajSeqoutClose(outseq);
    ajSeqoutDel(&outseq);


    /* 3. Build ememe command line */
    /* Command line is built in this order: 
       i.  Application name.
       ii. Original MEME options (in order they appear in ACD file)
       iii.Original MEME options (that don't appear in ACD file)
       iv. EMBASSY MEME new qualifiers and parameters.
       */
    ajStrAssignS(&cmd, ajAcdGetpathC("meme"));
    ajFmtPrintAppS(&cmd, " %S", ssname);

    if(bfile)
	ajFmtPrintAppS(&cmd, " -bfile %s ", ajFileGetNameC(bfile));

    if(plib)
	ajFmtPrintAppS(&cmd, " -plib %s ", ajFileGetNameC(plib));

    option = ajStrGetCharFirst(mod);
    if(option == 'o')
	ajStrAppendC(&cmd, " -mod oops ");
    else if(option == 'z')
	ajStrAppendC(&cmd, " -mod zoops ");
    else if(option == 'a')
	ajStrAppendC(&cmd, " -mod anr ");

    if(nmotifs != 1)
        ajFmtPrintAppS(&cmd, " -nmotifs %d ", nmotifs);

    if(text)
	ajFmtPrintAppS(&cmd, " -text ");

    ajFmtPrintAppS(&cmd,  " -prior %S ", prior);

    if(evt != -1)
	ajFmtPrintAppS(&cmd, " -evt %f ", evt);

    if(nsites != -1)
	ajFmtPrintAppS(&cmd, " -nsites %d ", nsites);
    else
    {
	if(minsites != -1)
	    ajFmtPrintAppS(&cmd, " -minsites %d ", minsites);
	if(maxsites != -1)
	    ajFmtPrintAppS(&cmd, " -maxsites %d ", maxsites);
    }

    if(wnsites < 0.7999 || wnsites > .8001)
        ajFmtPrintAppS(&cmd, " -wnsites %f ", wnsites);

    if(w != -1)
	ajFmtPrintAppS(&cmd, " -w %d ", w);

    if(minw != 8)
        ajFmtPrintAppS(&cmd, " -minw %d ", minw);

    if(maxw != 50)
        ajFmtPrintAppS(&cmd, " -maxw %d ", maxw);

    if(nomatrim)
	ajFmtPrintAppS(&cmd, " -nomatrim ");


    if(wg != 11)
        ajFmtPrintAppS(&cmd, " -wg %d ", wg);

    if(ws != 1)
        ajFmtPrintAppS(&cmd, " -ws %d ", ws);

    if(noendgaps)
	ajFmtPrintAppS(&cmd, " -noendgaps ");

    if(revcomp)
	ajFmtPrintAppS(&cmd, " -revcomp ");

    if(pal && ajSeqsetIsNuc(dataset))
	ajFmtPrintAppS(&cmd, " -pal ");

    if(nostatus)
	ajFmtPrintAppS(&cmd, " -nostatus ");

    if(maxiter != 50)
        ajFmtPrintAppS(&cmd, " -maxiter %d ", maxiter);

    if(distance < 0.00099 || distance > 0.00101)
        ajFmtPrintAppS(&cmd, " -distance %f ", distance);

    if(b != -1)
	ajFmtPrintAppS(&cmd, " -b %f ", b);

    if(spfuzz != -1)
	ajFmtPrintAppS(&cmd, " -spfuzz %f ", spfuzz);

    
    if(!ajStrMatchC(spmap,"default"))
        ajFmtPrintAppS(&cmd,  " -spmap %S ", spmap);

    if(MAJSTRGETLEN(cons))
	ajFmtPrintAppS(&cmd, "-cons %S", cons);

    if(maxsize != -1)
        ajFmtPrintAppS(&cmd, " -maxsize %d ", maxsize);

    if(p > 0)
	ajFmtPrintAppS(&cmd, " -p %d ", p);

    if(time > 0)
	ajFmtPrintAppS(&cmd, " -time %d ", time);

    if(MAJSTRGETLEN(sf))
	ajFmtPrintAppS(&cmd, " -sf %S", sf);

    if(heapsize != 64)
 	ajFmtPrintAppS(&cmd, " -heapsize %d ", heapsize);

    if(xbranch)
        ajFmtPrintAppS(&cmd, " -x_branch");

    if(wbranch)
        ajFmtPrintAppS(&cmd, " -w_branch");
    
    if(bfactor != 3)
 	ajFmtPrintAppS(&cmd, " -bfactor %d ", bfactor);

    if(ajSeqsetIsProt(dataset))
	ajFmtPrintAppS(&cmd, "-protein ");
    else
	ajFmtPrintAppS(&cmd, "-dna ");

    ajFmtPrintAppS(&cmd, " -text");

    ajFmtPrintAppS(&cmd, " > %S ", ajFileGetNameS(outtext));


    /* 4. Close files from ACD before calling meme */	
    ajFileClose(&bfile);	
    ajFileClose(&plib);


    /* 5. Call meme */
    /* ajFmtPrint("\n%S\n", cmd); */
    system(ajStrGetPtr(cmd));    


    /* 6. Exit cleanly */

    ajSeqsetDel(&dataset);
    ajStrDel(&cons);
    ajStrDel(&sf);
    ajStrDel(&mod);
    ajStrDel(&prior);
    ajStrDel(&spmap);

    ajStrDel(&cmd);
    ajStrDel(&ssname);
    ajStrDel(&tmp);
    
    ajFileClose(&bfile);
    ajFileClose(&plib);
    ajFileClose(&outtext);
    ajSeqoutDel(&outseq);
    
    embExit();

    return 0;
}
Пример #2
0
void embConsCalc(const AjPSeqset seqset,const AjPMatrix cmpmatrix,
		 ajint nseqs, ajint mlen,float fplural,float setcase,
		 ajint identity, AjBool gaps, AjPStr *cons)
{
    ajint i;
    ajint j;
    ajint k;
    ajint **matrix;
    ajint m1 = 0;
    ajint m2 = 0;
    ajint highindex;
    ajint matsize;
    ajint matchingmaxindex;
    ajint identicalmaxindex;

    float max;
    float contri = 0;
    float contrj = 0;
    float *identical;
    float *matching;

    AjPSeqCvt cvt  = 0;
    AjPFloat score = NULL;
    const char **seqcharptr;
    char res;
    char nocon = '-';
    void *freeptr;

    matrix  = ajMatrixGetMatrix(cmpmatrix);
    cvt     = ajMatrixGetCvt(cmpmatrix);	/* return conversion table */
    matsize = ajMatrixGetSize(cmpmatrix);

    AJCNEW(seqcharptr,nseqs);
    AJCNEW(identical,matsize);
    AJCNEW(matching,matsize);

    score = ajFloatNew();

    if(ajSeqsetIsNuc(seqset))        /* set non-consensus character */
	nocon = 'N';
    else if ( ajSeqsetIsProt(seqset))
	nocon = 'X';

    for(i=0;i<nseqs;i++)		/* get sequence as string */
	seqcharptr[i] =  ajSeqsetGetseqSeqC(seqset, i);

    for(k=0; k< mlen; k++)
    {
	res = nocon;

	for(i=0;i<matsize;i++)	      /* reset id's and +ve matches */
	{
	    identical[i] = 0.0;
	    matching[i] = 0.0;
	}

	for(i=0;i<nseqs;i++)
	    ajFloatPut(&score,i,0.);

	for(i=0;i<nseqs;i++)	      /* generate score for columns */
	{
	    m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[i][k]);

	    if(m1 || gaps)
		identical[m1] += ajSeqsetGetseqWeight(seqset,i);

	    for(j=i+1;j<nseqs;j++)
	    {
		m2 = ajSeqcvtGetCodeK(cvt,seqcharptr[j][k]);

		if(m1 && m2)
		{
		    contri = (float)matrix[m1][m2]*
                             ajSeqsetGetseqWeight(seqset,j)
			+ajFloatGet(score,i);
		    contrj = (float)matrix[m1][m2]*
                             ajSeqsetGetseqWeight(seqset,i)
			+ajFloatGet(score,j);

		    ajFloatPut(&score,i,contri);
		    ajFloatPut(&score,j,contrj);
		}
	    }
	}

	highindex = -1;
	max  = -(float)INT_MAX;

	for(i=0;i<nseqs;i++)
	    if( ajFloatGet(score,i) > max ||
	       (ajFloatGet(score,i) == max &&
		seqcharptr[highindex][k] == '-') )      
	    {
		highindex = i;
		max       = ajFloatGet(score,i);
	    }

	for(i=0;i<nseqs;i++)	  /* find +ve matches in the column */
	{
	    m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]);

	    if(!matching[m1])
		for(j=0;j<nseqs;j++)
		{
/*
//		    if( i != j)
//		    {
//			m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]);
//			if(m1 && m2 && matrix[m1][m2] > 0)
//			    matching[m1] += ajSeqsetGetseqWeight(seqset, j);
//		    }
*/
		    m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]);

		    if(m1 && m2 && matrix[m1][m2] > 0)
                        matching[m1] += ajSeqsetGetseqWeight(seqset, j);

		    if(gaps && !m1 && !m2)
                        matching[m1] += ajSeqsetGetseqWeight(seqset, j);
		}
	}


	matchingmaxindex  = 0;	  /* get max matching and identical */
	identicalmaxindex = 0;

	for(i=0;i<nseqs;i++)
	{
	    m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[i][k]);

	    if(identical[m1] > identical[identicalmaxindex])
		identicalmaxindex = m1;
	}

	for(i=0;i<nseqs;i++)
	{
	    m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[i][k]);

	    if(matching[m1] > matching[matchingmaxindex])
		matchingmaxindex = m1;
	    else if(matching[m1] ==  matching[matchingmaxindex])
		if(identical[m1] > identical[matchingmaxindex])
		    matchingmaxindex = m1;
	}

	/* plurality check */
        m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[highindex][k]);

/*	if(matching[m1] >= fplural
	   && seqcharptr[highindex][k] != '-')
	    res = seqcharptr[highindex][k];*/

	if(matching[m1] >= fplural)
	    res = seqcharptr[highindex][k];

	if(matching[m1]<= setcase)
	    res = tolower((int)res);

	if(identity)			/* if just looking for id's */
	{
	    j = 0;

	    for(i=0;i<nseqs;i++)
		if(matchingmaxindex == ajSeqcvtGetCodeK(cvt,seqcharptr[i][k]))
		    j++;

	    if(j<identity)
		res = nocon;
	}

	ajStrAppendK(cons,res);
    }

    freeptr = (void *) seqcharptr;
    AJFREE(freeptr);
    AJFREE(matching);
    AJFREE(identical);
    ajFloatDel(&score);

    return;
}