int main(int argc, char **argv) { /* ACD data item variables */ AjPSeqset dataset = NULL; AjPFile bfile = NULL; AjPFile plib = NULL; AjPStr mod = NULL; ajint nmotifs = 0; AjBool text = ajFalse; AjPStr prior = NULL; float evt = 0.0; ajint nsites = 0; ajint minsites = 0; ajint maxsites = 0; float wnsites = 0.0; ajint w = 0; ajint minw = 0; ajint maxw = 0; AjBool nomatrim = ajFalse; ajint wg = 0; ajint ws = 0; AjBool noendgaps = ajFalse; AjBool revcomp = ajFalse; AjBool pal = ajFalse; AjBool nostatus = ajFalse; ajint maxiter = 0; float distance = 0.0; float b = 0.0; float spfuzz = 0.0; AjPStr spmap = NULL; AjPStr cons = NULL; ajint maxsize = 0; ajint p = 0; ajint time = 0; AjPStr sf = NULL; ajint heapsize = 64; AjBool xbranch = ajFalse; AjBool wbranch = ajFalse; ajint bfactor = 0; AjPFile outtext = NULL; /* Housekeeping variables */ AjPStr cmd = NULL; AjPStr ssname = NULL; AjPSeqout outseq = NULL; AjPStr tmp = NULL; char option; /* ACD file processing */ embInitPV("ememetext",argc,argv,"MEME",VERSION); dataset = ajAcdGetSeqset("dataset"); bfile = ajAcdGetInfile("bfile"); plib = ajAcdGetInfile("plibfile"); mod = ajAcdGetSelectSingle("mod"); nmotifs = ajAcdGetInt("nmotifs"); text = ajAcdGetBoolean("text"); prior = ajAcdGetSelectSingle("prior"); evt = ajAcdGetFloat("evt"); nsites = ajAcdGetInt("nsites"); minsites = ajAcdGetInt("minsites"); maxsites = ajAcdGetInt("maxsites"); wnsites = ajAcdGetFloat("wnsites"); w = ajAcdGetInt("w"); minw = ajAcdGetInt("minw"); maxw = ajAcdGetInt("maxw"); nomatrim = ajAcdGetBoolean("nomatrim"); wg = ajAcdGetInt("wg"); ws = ajAcdGetInt("ws"); noendgaps = ajAcdGetBoolean("noendgaps"); revcomp = ajAcdGetBoolean("revcomp"); pal = ajAcdGetBoolean("pal"); nostatus = ajAcdGetBoolean("nostatus"); maxiter = ajAcdGetInt("maxiter"); distance = ajAcdGetFloat("distance"); b = ajAcdGetFloat("b"); spfuzz = ajAcdGetFloat("spfuzz"); spmap = ajAcdGetSelectSingle("spmap"); cons = ajAcdGetString("cons"); maxsize = ajAcdGetInt("maxsize"); p = ajAcdGetInt("p"); time = ajAcdGetInt("time"); sf = ajAcdGetString("sf"); heapsize = ajAcdGetInt("heapsize"); xbranch = ajAcdGetBoolean("xbranch"); wbranch = ajAcdGetBoolean("wbranch"); bfactor = ajAcdGetInt("bfactor"); outtext = ajAcdGetOutfile("outtext"); outseq = ajAcdGetSeqoutset("outseq"); /* MAIN APPLICATION CODE */ /* 1. Housekeeping */ cmd = ajStrNew(); tmp = ajStrNew(); /* 2. Re-write dataset to a temporary file in a format (fasta) MEME ** can understand. ** Can't just pass the name of dataset to MEME as the name provided ** might be a USA which MEME would not understand. */ ssname = ajStrNewS(ajFileGetNameS(outseq->File)); ajSeqoutSetFormatC(outseq, "fasta"); ajSeqoutWriteSet(outseq, dataset); ajSeqoutClose(outseq); ajSeqoutDel(&outseq); /* 3. Build ememe command line */ /* Command line is built in this order: i. Application name. ii. Original MEME options (in order they appear in ACD file) iii.Original MEME options (that don't appear in ACD file) iv. EMBASSY MEME new qualifiers and parameters. */ ajStrAssignS(&cmd, ajAcdGetpathC("meme")); ajFmtPrintAppS(&cmd, " %S", ssname); if(bfile) ajFmtPrintAppS(&cmd, " -bfile %s ", ajFileGetNameC(bfile)); if(plib) ajFmtPrintAppS(&cmd, " -plib %s ", ajFileGetNameC(plib)); option = ajStrGetCharFirst(mod); if(option == 'o') ajStrAppendC(&cmd, " -mod oops "); else if(option == 'z') ajStrAppendC(&cmd, " -mod zoops "); else if(option == 'a') ajStrAppendC(&cmd, " -mod anr "); if(nmotifs != 1) ajFmtPrintAppS(&cmd, " -nmotifs %d ", nmotifs); if(text) ajFmtPrintAppS(&cmd, " -text "); ajFmtPrintAppS(&cmd, " -prior %S ", prior); if(evt != -1) ajFmtPrintAppS(&cmd, " -evt %f ", evt); if(nsites != -1) ajFmtPrintAppS(&cmd, " -nsites %d ", nsites); else { if(minsites != -1) ajFmtPrintAppS(&cmd, " -minsites %d ", minsites); if(maxsites != -1) ajFmtPrintAppS(&cmd, " -maxsites %d ", maxsites); } if(wnsites < 0.7999 || wnsites > .8001) ajFmtPrintAppS(&cmd, " -wnsites %f ", wnsites); if(w != -1) ajFmtPrintAppS(&cmd, " -w %d ", w); if(minw != 8) ajFmtPrintAppS(&cmd, " -minw %d ", minw); if(maxw != 50) ajFmtPrintAppS(&cmd, " -maxw %d ", maxw); if(nomatrim) ajFmtPrintAppS(&cmd, " -nomatrim "); if(wg != 11) ajFmtPrintAppS(&cmd, " -wg %d ", wg); if(ws != 1) ajFmtPrintAppS(&cmd, " -ws %d ", ws); if(noendgaps) ajFmtPrintAppS(&cmd, " -noendgaps "); if(revcomp) ajFmtPrintAppS(&cmd, " -revcomp "); if(pal && ajSeqsetIsNuc(dataset)) ajFmtPrintAppS(&cmd, " -pal "); if(nostatus) ajFmtPrintAppS(&cmd, " -nostatus "); if(maxiter != 50) ajFmtPrintAppS(&cmd, " -maxiter %d ", maxiter); if(distance < 0.00099 || distance > 0.00101) ajFmtPrintAppS(&cmd, " -distance %f ", distance); if(b != -1) ajFmtPrintAppS(&cmd, " -b %f ", b); if(spfuzz != -1) ajFmtPrintAppS(&cmd, " -spfuzz %f ", spfuzz); if(!ajStrMatchC(spmap,"default")) ajFmtPrintAppS(&cmd, " -spmap %S ", spmap); if(MAJSTRGETLEN(cons)) ajFmtPrintAppS(&cmd, "-cons %S", cons); if(maxsize != -1) ajFmtPrintAppS(&cmd, " -maxsize %d ", maxsize); if(p > 0) ajFmtPrintAppS(&cmd, " -p %d ", p); if(time > 0) ajFmtPrintAppS(&cmd, " -time %d ", time); if(MAJSTRGETLEN(sf)) ajFmtPrintAppS(&cmd, " -sf %S", sf); if(heapsize != 64) ajFmtPrintAppS(&cmd, " -heapsize %d ", heapsize); if(xbranch) ajFmtPrintAppS(&cmd, " -x_branch"); if(wbranch) ajFmtPrintAppS(&cmd, " -w_branch"); if(bfactor != 3) ajFmtPrintAppS(&cmd, " -bfactor %d ", bfactor); if(ajSeqsetIsProt(dataset)) ajFmtPrintAppS(&cmd, "-protein "); else ajFmtPrintAppS(&cmd, "-dna "); ajFmtPrintAppS(&cmd, " -text"); ajFmtPrintAppS(&cmd, " > %S ", ajFileGetNameS(outtext)); /* 4. Close files from ACD before calling meme */ ajFileClose(&bfile); ajFileClose(&plib); /* 5. Call meme */ /* ajFmtPrint("\n%S\n", cmd); */ system(ajStrGetPtr(cmd)); /* 6. Exit cleanly */ ajSeqsetDel(&dataset); ajStrDel(&cons); ajStrDel(&sf); ajStrDel(&mod); ajStrDel(&prior); ajStrDel(&spmap); ajStrDel(&cmd); ajStrDel(&ssname); ajStrDel(&tmp); ajFileClose(&bfile); ajFileClose(&plib); ajFileClose(&outtext); ajSeqoutDel(&outseq); embExit(); return 0; }
void embConsCalc(const AjPSeqset seqset,const AjPMatrix cmpmatrix, ajint nseqs, ajint mlen,float fplural,float setcase, ajint identity, AjBool gaps, AjPStr *cons) { ajint i; ajint j; ajint k; ajint **matrix; ajint m1 = 0; ajint m2 = 0; ajint highindex; ajint matsize; ajint matchingmaxindex; ajint identicalmaxindex; float max; float contri = 0; float contrj = 0; float *identical; float *matching; AjPSeqCvt cvt = 0; AjPFloat score = NULL; const char **seqcharptr; char res; char nocon = '-'; void *freeptr; matrix = ajMatrixGetMatrix(cmpmatrix); cvt = ajMatrixGetCvt(cmpmatrix); /* return conversion table */ matsize = ajMatrixGetSize(cmpmatrix); AJCNEW(seqcharptr,nseqs); AJCNEW(identical,matsize); AJCNEW(matching,matsize); score = ajFloatNew(); if(ajSeqsetIsNuc(seqset)) /* set non-consensus character */ nocon = 'N'; else if ( ajSeqsetIsProt(seqset)) nocon = 'X'; for(i=0;i<nseqs;i++) /* get sequence as string */ seqcharptr[i] = ajSeqsetGetseqSeqC(seqset, i); for(k=0; k< mlen; k++) { res = nocon; for(i=0;i<matsize;i++) /* reset id's and +ve matches */ { identical[i] = 0.0; matching[i] = 0.0; } for(i=0;i<nseqs;i++) ajFloatPut(&score,i,0.); for(i=0;i<nseqs;i++) /* generate score for columns */ { m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[i][k]); if(m1 || gaps) identical[m1] += ajSeqsetGetseqWeight(seqset,i); for(j=i+1;j<nseqs;j++) { m2 = ajSeqcvtGetCodeK(cvt,seqcharptr[j][k]); if(m1 && m2) { contri = (float)matrix[m1][m2]* ajSeqsetGetseqWeight(seqset,j) +ajFloatGet(score,i); contrj = (float)matrix[m1][m2]* ajSeqsetGetseqWeight(seqset,i) +ajFloatGet(score,j); ajFloatPut(&score,i,contri); ajFloatPut(&score,j,contrj); } } } highindex = -1; max = -(float)INT_MAX; for(i=0;i<nseqs;i++) if( ajFloatGet(score,i) > max || (ajFloatGet(score,i) == max && seqcharptr[highindex][k] == '-') ) { highindex = i; max = ajFloatGet(score,i); } for(i=0;i<nseqs;i++) /* find +ve matches in the column */ { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(!matching[m1]) for(j=0;j<nseqs;j++) { /* // if( i != j) // { // m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]); // if(m1 && m2 && matrix[m1][m2] > 0) // matching[m1] += ajSeqsetGetseqWeight(seqset, j); // } */ m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]); if(m1 && m2 && matrix[m1][m2] > 0) matching[m1] += ajSeqsetGetseqWeight(seqset, j); if(gaps && !m1 && !m2) matching[m1] += ajSeqsetGetseqWeight(seqset, j); } } matchingmaxindex = 0; /* get max matching and identical */ identicalmaxindex = 0; for(i=0;i<nseqs;i++) { m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[i][k]); if(identical[m1] > identical[identicalmaxindex]) identicalmaxindex = m1; } for(i=0;i<nseqs;i++) { m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[i][k]); if(matching[m1] > matching[matchingmaxindex]) matchingmaxindex = m1; else if(matching[m1] == matching[matchingmaxindex]) if(identical[m1] > identical[matchingmaxindex]) matchingmaxindex = m1; } /* plurality check */ m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[highindex][k]); /* if(matching[m1] >= fplural && seqcharptr[highindex][k] != '-') res = seqcharptr[highindex][k];*/ if(matching[m1] >= fplural) res = seqcharptr[highindex][k]; if(matching[m1]<= setcase) res = tolower((int)res); if(identity) /* if just looking for id's */ { j = 0; for(i=0;i<nseqs;i++) if(matchingmaxindex == ajSeqcvtGetCodeK(cvt,seqcharptr[i][k])) j++; if(j<identity) res = nocon; } ajStrAppendK(cons,res); } freeptr = (void *) seqcharptr; AJFREE(freeptr); AJFREE(matching); AJFREE(identical); ajFloatDel(&score); return; }