double* embIeppKNew(void) { double *pK = NULL; AJCNEW(pK,EMBIEPSIZE); embIepPkRead(pK); /* read pK's */ return pK; }
AjBool embIepIepC(const char *s, ajint amino, ajint carboxyl, ajint sscount, ajint modlysine, double *pK, double *iep, AjBool termini) { ajint *c = NULL; ajint *op = NULL; double *K = NULL; double *pro = NULL; *iep = 0.0; AJCNEW(c, EMBIEPSIZE); AJCNEW(op, EMBIEPSIZE); AJCNEW(K, EMBIEPSIZE); AJCNEW(pro, EMBIEPSIZE); embIepCalcK(K,pK); /* Convert to dissoc consts */ /* Get sequence composition */ embIepCompC(s,amino,carboxyl,sscount, modlysine,c); if(!termini) c[EMBIEPAMINO] = c[EMBIEPCARBOXYL] = 0; *iep = embIepPhConverge(c,K,op,pro); AJFREE(pro); AJFREE(K); AJFREE(op); AJFREE(c); if(!*iep) return ajFalse; return ajTrue; }
AjBool ajRegExecallC(AjPRegexp prog, const char* str) { int startoffset = 0; int options = 0; if(!regDfaWorkspace) AJCNEW(regDfaWorkspace, regDfaWsCount); prog->matches = pcre_dfa_exec(prog->pcre, prog->extra, str, strlen(str), startoffset, options, prog->ovector, 3*prog->ovecsize, regDfaWorkspace, regDfaWsCount); if(prog->matches >= 0) { prog->orig = str; if(prog->matches == 0) ajWarn("ajRegExecallC too many substrings"); return ajTrue; } if(prog->matches < -1) /* -1 is a simple fail to match */ { /* others are recursion limits etc. */ ajDebug("ajRegExecallC returned unexpected status '%d'\n", prog->matches); prog->orig = str; /* needed for the trace */ ajRegTrace(prog); } prog->orig = NULL; return ajFalse; }
int main(int argc, char **argv) { AjPAlign align; AjPSeq a; AjPSeq b; AjPSeqout seqout; AjPStr m; AjPStr n; AjPStr merged = NULL; ajuint lena; ajuint lenb; const char *p; const char *q; ajint start1 = 0; ajint start2 = 0; float *path; ajint *compass; AjPMatrixf matrix; AjPSeqCvt cvt = 0; float **sub; float gapopen; float gapextend; ajulong maxarr = 1000; ajulong len; /* arbitrary. realloc'd if needed */ size_t stlen; float score; ajint begina; ajint beginb; embInit("merger", argc, argv); a = ajAcdGetSeq("asequence"); b = ajAcdGetSeq("bsequence"); seqout = ajAcdGetSeqout("outseq"); matrix = ajAcdGetMatrixf("datafile"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); align = ajAcdGetAlign("outfile"); gapopen = ajRoundFloat(gapopen, 8); gapextend = ajRoundFloat(gapextend, 8); AJCNEW(path, maxarr); AJCNEW(compass, maxarr); /* ** make the two sequences lowercase so we can show which one we are ** using in the merge by uppercasing it */ ajSeqFmtLower(a); ajSeqFmtLower(b); m = ajStrNew(); n = ajStrNew(); sub = ajMatrixfGetMatrix(matrix); cvt = ajMatrixfGetCvt(matrix); begina = ajSeqGetBegin(a); beginb = ajSeqGetBegin(b); lena = ajSeqGetLen(a); lenb = ajSeqGetLen(b); if(lenb > (ULONG_MAX/(ajulong)(lena+1))) ajFatal("Sequences too big. Try 'supermatcher'"); len = lena*lenb; if(len>maxarr) { ajDebug("merger: resize path, len to %d (%d * $d)\n", len, lena, lenb); stlen = (size_t) len; AJCRESIZE(path,stlen); AJCRESIZE(compass,stlen); maxarr=len; } p = ajSeqGetSeqC(a); q = ajSeqGetSeqC(b); ajStrAssignC(&m,""); ajStrAssignC(&n,""); score = embAlignPathCalc(p,q,lena,lenb,gapopen,gapextend,path,sub,cvt, compass, ajFalse); /*score = embAlignScoreNWMatrix(path,compass,gapopen,gapextend, a,b,lena,lenb,sub,cvt, &start1,&start2);*/ embAlignWalkNWMatrix(path,a,b,&m,&n,lena,lenb, &start1,&start2,gapopen, gapextend,compass); /* ** now construct the merged sequence, uppercase the bits of the two ** input sequences which are used in the merger */ merger_Merge(align, &merged,p,q,m,n,start1,start2, ajSeqGetNameC(a),ajSeqGetNameC(b)); embAlignReportGlobal(align, a, b, m, n, start1, start2, gapopen, gapextend, score, matrix, begina, beginb); ajAlignWrite(align); ajAlignReset(align); /* write the merged sequence */ ajSeqAssignSeqS(a, merged); ajSeqoutWriteSeq(seqout, a); ajSeqoutClose(seqout); ajSeqoutDel(&seqout); ajSeqDel(&a); ajSeqDel(&b); ajAlignClose(align); ajAlignDel(&align); ajStrDel(&merged); AJFREE(compass); AJFREE(path); ajStrDel(&n); ajStrDel(&m); embExit(); return 0; }
static EmbPEntry dbiblast_nextblastentry(PBlastDb db, ajint ifile, const AjPStr idformat, AjBool systemsort, AjPStr const * fields, ajint * maxFieldLen, ajuint* maxidlen, ajuint* countfield, AjPFile elistfile, AjPFile * alistfile) { ajint i; static ajint lastfile = -1; static ajint iparser = -1; static ajint called = 0; static ajuint tabhdr[TABLESIZE]; static ajint iload = TABLESIZE-1; static ajint irest = 0; static ajint ipos = 0; static ajint jpos = 0; ajint ir; ajint j; static ajint is = 0; char* token; static ajint nfields; ajint ifield; if(!called) { for(i=0; parser[i].Name; i++) if(ajStrMatchC(idformat, parser[i].Name)) { iparser = i; break; } if(iparser < 0) ajFatal("idformat '%S' unknown", idformat); ajDebug("idformat '%S' Parser %d\n", idformat, iparser); ajStrSetRes(&id, HDRSIZE); ajStrSetRes(&acc, HDRSIZE); ajStrSetRes(&hline, HDRSIZE); called = 1; } if(!fdl) { nfields=0; while(fields[nfields]) nfields++; if(nfields) AJCNEW(fdl, nfields); for(i=0; i < nfields; i++) fdl[i] = ajListNew(); } if(lastfile != ifile) { lastfile = ifile; ipos = 1; /* isize = 0;*/ irest = 0; iload = TABLESIZE-1; } if(!dbiblastEntry || !systemsort) dbiblastEntry = embDbiEntryNew(nfields); /* pick up the next entry, parse it and dump it */ if(ipos > db->Size) return NULL; if( ipos >= irest) { ajDebug("ipos: %d iload: %d irest: %d\n", ipos, iload, irest); irest = ipos + TABLESIZE - 2; if(irest > db->Size) { iload = db->Size - ipos + 1; irest = db->Size; } jpos=0; j = dbiblast_loadtable(tabhdr, iload, db, db->TopHdr, ipos-1); if(!j) ajDebug("No elements read"); } j = dbiblast_ncblreadhdr(&hline, db, tabhdr[jpos], tabhdr[jpos+1]); if(!parser[iparser].Parser(hline, alistfile, systemsort, fields, maxFieldLen, countfield, &id, fdl)) ajFatal("failed to parse '%S'", hline); ir = ipos; if(ajStrGetLen(id) > *maxidlen) *maxidlen = ajStrGetLen(id); if(systemsort) ajFmtPrintF(elistfile, "%S %d %d %d\n", id, ir, is, ifile+1); else { dbiblastEntry->entry = ajCharNewS(id); dbiblastEntry->rpos = ir; dbiblastEntry->spos = is; dbiblastEntry->filenum = ifile+1; /* field tokens as list, then move to dbiblastEntry->field */ for(ifield=0; ifield < nfields; ifield++) { dbiblastEntry->nfield[ifield] = ajListGetLength(fdl[ifield]); if(dbiblastEntry->nfield[ifield]) { AJCNEW(dbiblastEntry->field[ifield], dbiblastEntry->nfield[ifield]); i = 0; while(ajListPop(fdl[ifield], (void**) &token)) dbiblastEntry->field[ifield][i++] = token; } else dbiblastEntry->field[ifield] = NULL; } } ipos++; jpos++; return dbiblastEntry; }
int main(int argc, char **argv) { AjPList idlist; AjPList* fieldList = NULL; AjBool systemsort; AjBool cleanup; ajint blastv = 0; char dbtype = '\0'; ajuint maxindex; ajuint maxidlen = 0; ajuint maxlen; AjPStr version = NULL; AjPStr seqtype = NULL; AjPFile elistfile = NULL; AjPFile* alistfile = NULL; AjPStr dbname = NULL; AjPStr release = NULL; AjPStr datestr = NULL; AjPStr sortopt = NULL; void **entryIds = NULL; AjBool usesrc = AJTRUE; AjPStr directory; AjPStr indexdir; AjPStr filename; AjPStr exclude; AjPStr curfilename = NULL; AjPStr idformat = NULL; EmbPEntry entry; PBlastDb db = NULL; ajuint idCount = 0; ajuint idDone; AjPList listTestFiles = NULL; void ** testFiles = NULL; ajuint nfiles; ajuint ifile; ajuint jfile; ajuint filesize; short recsize; ajuint maxfilelen = 20; char date[4] = { 0,0,0,0 }; AjPStr tmpfname = NULL; AjPStr* fields = NULL; AjPFile entFile = NULL; AjPStr* divfiles = NULL; ajint* maxFieldLen = NULL; ajuint ifield = 0; ajuint nfields = 0; AjPFile logfile = NULL; ajuint* countField = NULL; ajuint* fieldTot = NULL; ajuint idCountFile = 0; ajuint i = 0; embInit("dbiblast", argc, argv); idformat = ajStrNewC("NCBI"); fields = ajAcdGetList("fields"); directory = ajAcdGetDirectoryName("directory"); indexdir = ajAcdGetOutdirName("indexoutdir"); filename = ajAcdGetString("filenames"); exclude = ajAcdGetString("exclude"); dbname = ajAcdGetString("dbname"); release = ajAcdGetString("release"); datestr = ajAcdGetString("date"); systemsort = ajAcdGetBoolean("systemsort"); cleanup = ajAcdGetBoolean("cleanup"); sortopt = ajAcdGetString("sortoptions"); maxindex = ajAcdGetInt("maxindex"); version = ajAcdGetListSingle("blastversion"); seqtype = ajAcdGetListSingle("seqtype"); usesrc = ajAcdGetBoolean("sourcefile"); logfile = ajAcdGetOutfile("outfile"); while(fields[nfields]) /* array ends with a NULL */ nfields++; if(nfields) { AJCNEW(maxFieldLen, nfields); AJCNEW0(countField, nfields); AJCNEW0(fieldTot, nfields); for(ifield=0; ifield < nfields; ifield++) maxFieldLen[ifield] = (ajint) maxindex * -1; if(systemsort) AJCNEW(alistfile, nfields); else { AJCNEW(fieldList, nfields); for(ifield=0; ifield < nfields; ifield++) fieldList[ifield] = ajListNew(); } } if(ajStrMatchC(datestr, "00/00/00")) ajFmtPrintS(&datestr, "%D", ajTimeRefTodayFmt("dbindex")); ajStrRemoveWhite(&dbname); /* used for temp filenames */ embDbiDateSet(datestr, date); idlist = ajListNew(); if(ajUtilGetBigendian()) readReverse = ajFalse; else readReverse = ajTrue; ajStrToInt(version, &blastv); dbtype = ajStrGetCharFirst(seqtype); ajDebug("reading '%S/%S'\n", directory, filename); ajDebug("writing '%S/'\n", indexdir); listTestFiles = embDbiFileListExc(directory, filename, exclude); ajListSort(listTestFiles, ajStrVcmp); nfiles = ajListToarray(listTestFiles, &testFiles); if(!nfiles) ajDie("No input files in '%S' matched filename '%S'", directory, filename); embDbiLogHeader(logfile, dbname, release, datestr, indexdir, maxindex); embDbiLogFields(logfile, fields, nfields); embDbiLogSource(logfile, directory, filename, exclude, (AjPStr*) testFiles, nfiles); embDbiLogCmdline(logfile); AJCNEW0(divfiles, nfiles); /* ** process each input file, one at a time */ jfile = 0; for(ifile=0; ifile < nfiles; ifile++) { curfilename = (AjPStr) testFiles[ifile]; if(!dbiblast_blastopenlib(curfilename, usesrc, blastv, dbtype, &db)) continue; /* could be the wrong file type with "*.*" */ ajDebug("processing filename '%S' ...\n", curfilename); ajDebug("processing file '%S' ...\n", db->TFile->Name); ajStrAssignS(&divfiles[jfile], db->TFile->Name); ajFilenameTrimPath(&divfiles[jfile]); if(ajStrGetLen(divfiles[jfile]) >= maxfilelen) maxfilelen = ajStrGetLen(divfiles[jfile]) + 1; if(systemsort) /* elistfile for entries, alist for fields */ elistfile = embDbiSortOpen(alistfile, jfile, dbname, fields, nfields); idCountFile = 0; for(i=0;i<nfields;i++) countField[i] = 0; while((entry=dbiblast_nextblastentry(db, jfile, idformat, systemsort, fields, maxFieldLen, &maxidlen, countField, elistfile, alistfile))) { idCountFile++; if(!systemsort) /* save the entry data in lists */ { embDbiMemEntry(idlist, fieldList, nfields, entry, jfile); } } idCount += idCountFile; if(systemsort) { embDbiSortClose(&elistfile, alistfile, nfields); /* lost the entry, so can't free it :-) */ } embDbiLogFile(logfile, curfilename, idCountFile, fields, countField, nfields); dbiblast_dbfree(&db); jfile++; } nfiles = jfile; /* ** write the division.lkp file */ embDbiWriteDivision(indexdir, dbname, release, date, maxfilelen, nfiles, divfiles, NULL); /* ** Write the entryname.idx index */ ajStrAssignC(&tmpfname, "entrynam.idx"); entFile = ajFileNewOutNamePathS(tmpfname, indexdir); recsize = maxidlen+10; filesize = 300 + (idCount*(ajint)recsize); embDbiHeader(entFile, filesize, idCount, recsize, dbname, release, date); if(systemsort) idDone = embDbiSortWriteEntry(entFile, maxidlen, dbname, nfiles, cleanup, sortopt); else /* save entries in entryIds array */ { idDone = embDbiMemWriteEntry(entFile, maxidlen, idlist, &entryIds); if(idDone != idCount) ajFatal("Duplicates not allowed for in-memory processing"); } embDbiHeaderSize(entFile, 300+(idDone*(ajint)recsize), idDone); ajFileClose(&entFile); /* ** Write the fields index files */ for(ifield=0; ifield < nfields; ifield++) { if(maxindex) maxlen = maxindex; else { if(maxFieldLen[ifield] >= 0) maxlen = maxFieldLen[ifield]; else maxlen = - maxFieldLen[ifield]; } if(systemsort) fieldTot[ifield] = embDbiSortWriteFields(dbname, release, date, indexdir, fields[ifield], maxlen, nfiles, idCount, cleanup, sortopt); else fieldTot[ifield] = embDbiMemWriteFields(dbname, release, date, indexdir, fields[ifield], maxlen, fieldList[ifield], entryIds); } embDbiLogFinal(logfile,maxindex, maxFieldLen, fields, fieldTot, nfields, nfiles, idDone, idCount); if(systemsort) embDbiRmEntryFile(dbname, cleanup); ajListMap(idlist, embDbiEntryDelMap, NULL); ajListFree(&idlist); AJFREE(entryIds); ajStrDelarray(&fields); for(i=0;i<nfields;i++) { if(systemsort) { ajFileClose(&alistfile[i]); } else { ajListMap(fieldList[i], embDbiFieldDelMap, NULL); ajListFree(&fieldList[i]); } } AJFREE(alistfile); AJFREE(fieldList); ajStrDel(&version); ajStrDel(&seqtype); ajFileClose(&elistfile); for(i=0;i<nfiles;i++) { ajStrDel(&divfiles[i]); } AJFREE(countField); AJFREE(fieldTot); ajStrDel(&dbname); ajStrDel(&release); ajStrDel(&datestr); ajStrDel(&sortopt); ajStrDel(&directory); ajStrDel(&indexdir); ajStrDel(&filename); ajStrDel(&exclude); ajStrDel(&idformat); ajStrDel(&tmpfname); AJFREE(maxFieldLen); ajFileClose(&logfile); ajListstrFreeData(&listTestFiles); ajStrDel(&t); ajStrDel(&id); ajStrDel(&acc); ajStrDel(&hline); ajStrDel(&tmpdes); ajStrDel(&tmpfd); ajStrDel(&tmpgi); ajStrDel(&tmpdb); ajStrDel(&tmpac); ajStrDel(&tmpsv); ajRegFree(&wrdexp); embDbiEntryDel(&dbiblastEntry); if(fdl) { for(i=0; i < nfields; i++) ajListFree(&fdl[i]); AJFREE(fdl); } for(i=0;i<nfiles;i++) { ajStrDel(&divfiles[i]); } AJFREE(divfiles); AJFREE(testFiles); embExit(); return 0; }
int main(int argc, char **argv) { ajint i; ajint numseq; ajint j = 0; ajint numres; ajint count; ajint k; ajint kmax; float defheight; float currentscale; AjPStr shade = NULL; AjPFloat pair = NULL; AjPGraph graph = NULL; AjPMatrix cmpmatrix = NULL; AjPSeqCvt cvt = NULL; AjPStr matcodes = NULL; AjBool consensus; AjBool colourbyconsensus; AjBool colourbyresidues; AjBool colourbyshade = AJFALSE; AjBool boxit; AjBool boxcol; AjBool portrait; AjBool collision; ajint identity; AjBool listoptions; ajint alternative; AjPStr altstr = NULL; AjPStr sidentity = NULL; AjPStr ssimilarity = NULL; AjPStr sother = NULL; AjPStr sboxcolval = NULL; AjPStr options = NULL; /* ajint showscore = 0; */ ajint iboxcolval = 0; ajint cidentity = RED; ajint csimilarity = GREEN; ajint cother = BLACK; float fxp; float fyp; float yincr; float y; ajint ixlen; ajint iylen; ajint ixoff; ajint iyoff; char res[2] = " "; float *score = 0; float scoremax = 0; float *identical = NULL; ajint identicalmaxindex; float *matching = NULL; ajint matchingmaxindex; float *colcheck = NULL; ajint **matrix; ajint m1 = 0; ajint m2 = 0; ajint ms = 0; ajint highindex = 0; ajint myindex; ajint *previous = 0; AjBool iscons = ajFalse; ajint currentstate = 0; ajint oldfg = 0; float fold = 0.0; ajint *colmat = 0; ajint *shadecolour = 0; /* float identthresh = 1.5; */ /* float simthresh = 1.0; */ /* float relthresh = 0.5; */ float part = 0.0; const char *cptr; ajint resbreak; float fplural; float ystart; float xmin; float xmax; float xmid; AjPTime ajtime; ajint gapcount = 0; ajint countforgap = 0; ajint boxindex; float max; ajint matsize; ajint seqperpage = 0; ajint startseq; ajint endseq; ajint newILend = 0; ajint newILstart; void *freeptr; ajint itmp; embInit("prettyplot", argc, argv); seqset = ajAcdGetSeqset("sequences"); numres = ajAcdGetInt("residuesperline"); resbreak = ajAcdGetInt("resbreak"); ajSeqsetFill(seqset); /* Pads sequence set with gap characters */ numseq = ajSeqsetGetSize(seqset); graph = ajAcdGetGraph("graph"); colourbyconsensus = ajAcdGetBoolean("ccolours"); colourbyresidues = ajAcdGetBoolean("docolour"); shade = ajAcdGetString("shade"); pair = ajAcdGetArray("pair"); identity = ajAcdGetInt("identity"); boxit = ajAcdGetBoolean("box"); ajtime = ajTimeNewTodayFmt("daytime"); ajSeqsetTrim(seqset); /* offset = ajSeqsetGetOffset(seqset); Unused */ ajGraphAppendTitleS(graph, ajSeqsetGetUsa(seqset)); if(boxit) { AJCNEW(seqboxptr, numseq); for(i=0;i<numseq;i++) AJCNEW(seqboxptr[i], ajSeqsetGetLen(seqset)); } boxcol = ajAcdGetBoolean("boxcol"); sboxcolval = ajAcdGetString("boxuse"); if(boxcol) { iboxcolval = ajGraphicsCheckColourS(sboxcolval); if(iboxcolval == -1) iboxcolval = GREY; } consensus = ajAcdGetBoolean("consensus"); if(consensus) { AJCNEW(constr, ajSeqsetGetLen(seqset)+1); constr[0] = '\0'; } shownames = ajAcdGetBoolean("name"); shownumbers = ajAcdGetBoolean("number"); charlen = ajAcdGetInt("maxnamelen"); fplural = ajAcdGetFloat("plurality"); portrait = ajAcdGetBoolean("portrait"); collision = ajAcdGetBoolean("collision"); listoptions = ajAcdGetBoolean("listoptions"); altstr = ajAcdGetListSingle("alternative"); cmpmatrix = ajAcdGetMatrix("matrixfile"); ajStrToInt(altstr, &alternative); matrix = ajMatrixGetMatrix(cmpmatrix); cvt = ajMatrixGetCvt(cmpmatrix); matsize = ajMatrixGetSize(cmpmatrix); AJCNEW(identical,matsize); AJCNEW(matching,matsize); AJCNEW(colcheck,matsize); numgaps = numres/resbreak; numgaps--; if(portrait) { ajGraphicsSetPortrait(1); ystart = (float) 75.0; } else ystart = (float) 75.0; /* pair is an array of three non-negative floats */ /* identthresh = ajFloatGet(pair,0); Unused */ /* simthresh = ajFloatGet(pair,1); Unused */ /* relthresh = ajFloatGet(pair,2); Unused */ /* ** shade is a formatted 4-character string. Characters BLPW only. ** controlled by a pattern in ACD. */ if(ajStrGetLen(shade)) { AJCNEW(shadecolour,4); cptr = ajStrGetPtr(shade); for(i=0;i<4;i++){ if(cptr[i]== 'B' || cptr[i]== 'b') shadecolour[i] = BLACK; else if(cptr[i]== 'L' || cptr[i]== 'l') shadecolour[i] = BROWN; else if(cptr[i]== 'P' || cptr[i]== 'p') shadecolour[i] = WHEAT; else if(cptr[i]== 'W' || cptr[i]== 'w') shadecolour[i] = WHITE; } colourbyconsensus = colourbyresidues = ajFalse; colourbyshade = ajTrue; } /* ** we can colour by consensus or residue but not both ** if we have to choose, use the consensus */ if(colourbyconsensus && colourbyresidues) colourbyconsensus = AJFALSE; sidentity = ajAcdGetString("cidentity"); ssimilarity = ajAcdGetString("csimilarity"); sother = ajAcdGetString("cother"); if(colourbyconsensus) { cidentity = ajGraphicsCheckColourS(sidentity); if(cidentity == -1) cidentity = RED; csimilarity = ajGraphicsCheckColourS(ssimilarity); if(csimilarity == -1) csimilarity = GREEN; cother = ajGraphicsCheckColourS(sother); if(cother == -1) cother = BLACK; } else if(colourbyresidues) { matcodes = ajMatrixGetCodes(cmpmatrix); if(ajSeqsetIsProt(seqset)) colmat = ajGraphicsBasecolourNewProt(matcodes); else colmat = ajGraphicsBasecolourNewNuc(matcodes); } /* output the options used as the subtitle for the bottom of the graph */ if(listoptions) { ajStrAssignC(&options,""); ajFmtPrintAppS(&options,"-plurality %.1f",fplural); if(collision) ajStrAppendC(&options," -collision"); else ajStrAppendC(&options," -nocollision"); if(boxit) ajStrAppendC(&options," -box"); else ajStrAppendC(&options," -nobox"); if(boxcol) ajStrAppendC(&options," -boxcol"); else ajStrAppendC(&options," -noboxcol"); if(colourbyconsensus) ajStrAppendC(&options," -colbyconsensus"); else if(colourbyresidues) ajStrAppendC(&options," -colbyresidues"); else if(colourbyshade) ajStrAppendC(&options," -colbyshade"); else ajStrAppendC(&options," -nocolour"); if(alternative==2) ajStrAppendC(&options," -alt 2"); else if(alternative==1) ajStrAppendC(&options," -alt 1"); else if(alternative==3) ajStrAppendC(&options," -alt 3"); } AJCNEW(seqcolptr, numseq); for(i=0;i<numseq;i++) AJCNEW(seqcolptr[i], ajSeqsetGetLen(seqset)); AJCNEW(seqcharptr, numseq); AJCNEW(seqnames, numseq); AJCNEW(score, numseq); AJCNEW(previous, numseq); AJCNEW(seqcount, numseq); for(i=0;i<numseq;i++) { ajSeqsetFmtUpper(seqset); seqcharptr[i] = ajSeqsetGetseqSeqC(seqset, i); seqnames[i] = 0; ajStrAppendS(&seqnames[i],ajSeqsetGetseqNameS(seqset, i)); ajStrTruncateLen(&seqnames[i],charlen); previous[i] = 0; seqcount[i] = 0; } /* ** user will pass the number of residues to fit a page ** therefore we now need to calculate the size of the chars ** based on this and get the new char width. ** 'charlen' maximum characters for the name (truncated above) */ ajGraphicsGetCharsize(&defheight,¤tscale); xmin = -charlen - (float)2.0; xmax = (float)numres+(float)11.0+(float)(numres/resbreak); xmid = (xmax + xmin)/(float)2.0; ajGraphOpenWin(graph, xmin, xmax, (float)0.0, ystart+(float)1.0); ajGraphGetParamsPage(graph, &fxp,&fyp,&ixlen,&iylen,&ixoff,&iyoff); if(portrait) { itmp = ixlen; ixlen = iylen; iylen = itmp; } ajGraphicsGetCharsize(&defheight,¤tscale); ajGraphicsSetCharscale(((float)ixlen/((float)(numres+charlen+1)* (currentscale * (float) 1.5)))/ currentscale); /* ajGraphicsSetCharscale(((float)ixlen/((float)(numres+charlen)* (currentscale+(float)1.0)))/ currentscale); */ ajGraphicsGetCharsize(&defheight,¤tscale); yincr = (currentscale + (float)3.0)*(float)0.3; /* ** If we have titles (now the standard graph title and subtitle and footer) ** leave 7 rows of space for them */ y=ystart-(float)7.0; if(ajStrGetLen(options)) { fold = ajGraphicsSetCharscale(1.0); ajGraphicsDrawposTextAtmid(xmid,2.0, ajStrGetPtr(options)); ajGraphicsSetCharscale(fold); } /* if sequences per page not set then calculate it */ if(!seqperpage) { seqperpage = prettyplot_calcseqperpage(yincr,y,consensus); if(seqperpage>numseq) seqperpage=numseq; } count = 0; /* ** for boxes we need to set a foreground colour for the box lines ** and save the current foreground colour */ if(boxit && boxcol) oldfg = ajGraphicsSetFgcolour(iboxcolval); /* ** step through each residue position */ kmax = ajSeqsetGetLen(seqset) - 1; for(k=0; k<= kmax; k++) { /* reset column score array */ for(i=0;i<numseq;i++) score[i] = 0.0; /* reset matrix character testing arrays */ for(i=0;i<matsize;i++) { identical[i] = 0.0; matching[i] = 0.0; colcheck[i] = 0.0; } /* generate a score for this residue in each sequence */ for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); for(j=0;j<numseq;j++) { m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]); if(m1 && m2) score[i] += (float)matrix[m1][m2]* ajSeqsetGetseqWeight(seqset, j); } if(m1) identical[m1] += ajSeqsetGetseqWeight(seqset, i); } /* find the highest score */ highindex = -1; scoremax = INT_MIN; /*ajDebug("Scores at position %d:\n", k);*/ for(i=0;i<numseq;i++) { /*ajDebug(" seq %d: '%c' %f\n",i,seqcharptr[i][k],score[i]);*/ if(score[i] > scoremax) { scoremax = score[i]; highindex = i; } } for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(!matching[m1]) { for(j=0;j<numseq;j++) { m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]); if(m1 && m2 && matrix[m1][m2] > 0) matching[m1] += ajSeqsetGetseqWeight(seqset, j); } } } /* find highs for matching and identical */ matchingmaxindex = 0; identicalmaxindex = 0; for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(identical[m1] > identical[identicalmaxindex]) identicalmaxindex = m1; } for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(matching[m1] > matching[matchingmaxindex]) matchingmaxindex = m1; else if(matching[m1] == matching[matchingmaxindex]) { if(identical[m1] > identical[matchingmaxindex]) matchingmaxindex= m1; } } iscons = ajFalse; boxindex = -1; max = -3; ajDebug("k:%2d highindex:%2d matching:%4.2f\n", k, highindex, matching[ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])]); if(highindex != -1 && matching[ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])] >= fplural) { iscons = ajTrue; boxindex = highindex; } else { for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(matching[m1] > max) { max = matching[m1]; highindex = i; } else if(matching[m1] == max) { if(identical[m1] > identical[ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])] ) { max = matching[m1]; highindex = i; } } } if(matching[ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])] >= fplural) { iscons = ajTrue; boxindex = highindex; } } if(iscons) { if(!collision) { /* check for collisions */ if(alternative == 1) { /* check to see if this is unique for collisions */ for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(identical[m1] >= identical[identicalmaxindex] && m1 != identicalmaxindex) iscons = ajFalse; } /*ajDebug("after (alt=1) iscons: %B",iscons);*/ } else if(alternative == 2) { for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if((matching[m1] >= matching[matchingmaxindex] && m1 != matchingmaxindex && matrix[m1][matchingmaxindex] < 0.1)|| (identical[m1] >= identical[matchingmaxindex] && m1 != matchingmaxindex)) iscons = ajFalse; } } else if(alternative == 3) { /* ** to do this check one is NOT in consensus to see if ** another score of fplural has been found */ ms = ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k]); for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(ms != m1 && colcheck[m1] == 0.0) /* NOT in the current consensus */ for(j=0;j<numseq;j++) { m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]); if( matrix[ms][m2] < 0.1) { /* NOT in the current consensus */ if( matrix[m1][m2] > 0.1) colcheck[m1] += ajSeqsetGetseqWeight(seqset, j); } } } for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); /* if any other matches then we have a collision */ if(colcheck[m1] >= fplural) iscons = ajFalse; } /*ajDebug("after alt=2 iscons: %B", iscons);*/ } else { for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if((matching[m1] >= matching[matchingmaxindex] && m1 != matchingmaxindex && matrix[m1][matchingmaxindex] < 0.1)) iscons = ajFalse; if(identical[m1] >= identical[matchingmaxindex] && m1 != matchingmaxindex && matrix[m1][matchingmaxindex] > 0.1) iscons = ajFalse; } if(!iscons) { /* matches failed try identicals */ if(identical[identicalmaxindex] >= fplural) { iscons = ajTrue; /* ** if nothing has an equal or higher match that ** does not match highest then false */ for(i=0;i<numseq;i++) { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(identical[m1] >= identical[identicalmaxindex] && m1 != identicalmaxindex) iscons = ajFalse; else if(matching[m1] >= matching[identicalmaxindex] && matrix[m1][matchingmaxindex] <= 0.0) iscons = ajFalse; else if(m1 == identicalmaxindex) j = i; } if(iscons) highindex = j; } } } } if(identity) { j = 0; for(i=0;i<numseq;i++) if(seqcharptr[highindex][k] == seqcharptr[i][k]) j++; if(j<identity) iscons = ajFalse; } } /* ** Done a full line of residues ** Boxes have been defined up to this point */ if(count >= numres ) { /* check y position for next set */ y=y-(yincr*((float)numseq+(float)2.0+((float)consensus*(float)2))); if(y<yincr*((float)numseq+(float)2.0+((float)consensus*(float)2))) { /* full page - print it */ y=ystart-(float)6.0; startseq = 0; endseq = seqperpage; newILstart = newILend; newILend = k; while(startseq < numseq) { /* AJB */ /*if(startseq != 0) ajGraphNewpage(graph, AJFALSE);*/ /*ajDebug("Inner loop: startseq: %d numseq: %d endseq: %d\n", startseq, numseq, endseq);*/ if(endseq>numseq) endseq=numseq; prettyplot_fillinboxes(numseq,ajSeqsetGetLen(seqset), startseq,endseq, newILstart,newILend, numres,resbreak, boxit,boxcol,consensus, ystart,yincr,cvt); startseq = endseq; endseq += seqperpage; ajGraphNewpage(graph, AJFALSE); } } count = 0; gapcount = 0; } count++; countforgap++; for(j=0;j<numseq;j++) { /* START OF BOXES */ if(boxit) { seqboxptr[j][k] = 0; if(boxindex!=-1) { myindex = boxindex; if(matrix[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])] [ajSeqcvtGetCodeK(cvt, seqcharptr[myindex][k])] > 0) part = 1.0; else { if(identical[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])] >= fplural) part = 1.0; else part = 0.0; } if(previous[j] != part) /* draw vertical line */ seqboxptr[j][k] |= BOXLEF; if(j==0) { /* special case for horizontal line */ if(part) { currentstate = 1; /* draw hori line */ seqboxptr[j][k] |= BOXTOP; } else currentstate = 0; } else { /* j != 0 Normal case for horizontal line */ if(part != currentstate) { /*draw hori line */ seqboxptr[j][k] |= BOXTOP; currentstate = (ajint) part; } } if(j== numseq-1 && currentstate) /* draw horiline at bottom */ seqboxptr[j][k] |= BOXBOT; previous[j] = (ajint) part; } else { part = 0; if(previous[j]) { /* draw vertical line */ seqboxptr[j][k] |= BOXLEF; } previous[j] = 0; } if(count == numres || k == kmax || countforgap >= resbreak ) { /* last one on the row or a break*/ if(previous[j]) { /* draw vertical line */ seqboxptr[j][k] |= BOXRIG; } previous[j] = 0; } } /* end box */ if(boxit && boxcol) if(boxindex != -1) { myindex = boxindex; if(matrix[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])] [ajSeqcvtGetCodeK(cvt, seqcharptr[myindex][k])] > 0 || identical[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])] >= fplural ) seqboxptr[j][k] |= BOXCOLOURED; } /* END OF BOXES */ if(ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])) res[0] = seqcharptr[j][k]; else res[0] = '-'; if(colourbyconsensus) { part = (float) matrix[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])] [ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])]; if(iscons && seqcharptr[highindex][k] == seqcharptr[j][k]) seqcolptr[j][k] = cidentity; else if(part > 0.0) seqcolptr[j][k] = csimilarity; else seqcolptr[j][k] = cother; } else if(colourbyresidues) seqcolptr[j][k] = colmat[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])]; else if(iscons && colourbyshade) { part = (float) matrix[ajSeqcvtGetCodeK(cvt, seqcharptr[j][k])] [ajSeqcvtGetCodeK(cvt, seqcharptr[highindex][k])]; if(part >= 1.5) seqcolptr[j][k] = shadecolour[0]; else if(part >= 1.0) seqcolptr[j][k] = shadecolour[1]; else if(part >= 0.5) seqcolptr[j][k] = shadecolour[2]; else seqcolptr[j][k] = shadecolour[3]; } else if(colourbyshade) seqcolptr[j][k] = shadecolour[3]; else seqcolptr[j][k] = BLACK; } if(consensus) { if(iscons) res[0] = seqcharptr[highindex][k]; else res[0] = '-'; strcat(constr,res); } if(countforgap >= resbreak) { gapcount++; countforgap=0; } } startseq = 0; endseq=seqperpage; newILstart = newILend; newILend = k; while(startseq < numseq) { if(startseq) ajGraphNewpage(graph, AJFALSE); /*ajDebug("Final loop: startseq: %d numseq: %d endseq: %d\n", startseq, numseq, endseq);*/ if(endseq>numseq) endseq = numseq; prettyplot_fillinboxes(numseq,ajSeqsetGetLen(seqset), startseq,endseq, newILstart,newILend, numres,resbreak, boxit,boxcol,consensus, ystart,yincr,cvt); startseq = endseq; endseq += seqperpage; } ajGraphicsGetCharsize(&defheight,¤tscale); if(boxit && boxcol) oldfg = ajGraphicsSetFgcolour(oldfg); ajGraphicsCloseWin(); ajGraphxyDel(&graph); ajStrDel(&sidentity); ajStrDel(&ssimilarity); ajStrDel(&sother); ajStrDel(&options); ajStrDel(&altstr); ajStrDel(&matcodes); for(i=0;i<numseq;i++) { ajStrDel(&seqnames[i]); AJFREE(seqcolptr[i]); if(seqboxptr) AJFREE(seqboxptr[i]); } AJFREE(seqcolptr); AJFREE(seqboxptr); AJFREE(seqnames); AJFREE(score); AJFREE(previous); AJFREE(seqcount); AJFREE(colmat); AJFREE(shadecolour); freeptr = (void *) seqcharptr; AJFREE(freeptr); AJFREE(identical); AJFREE(matching); AJFREE(colcheck); ajSeqsetDel(&seqset); ajMatrixDel(&cmpmatrix); ajStrDel(&shade); ajStrDel(&sboxcolval); ajStrDel(&sidentity); ajStrDel(&ssimilarity); ajStrDel(&sother); ajFloatDel(&pair); ajTimeDel(&ajtime); AJFREE(constr); embExit(); return 0; }
/* @funcstatic newcoils_pred_coils ******************************************** ** ** Undocumented ** ** @param [u] outf [AjPFile] Undocumented ** @param [r] seq [const char*] Undocumented ** @param [r] ident [const char*] Undocumented ** @param [r] str [const AjPStr] Undocumented ** @param [r] h [const struct hept_pref *] Undocumented ** @param [r] win [ajint] Undocumented ** @param [r] which [ajint] Undocumented ** @param [r] weighted [ajint] Undocumented ** @param [r] mode [ajint] Undocumented ** @param [r] min_P [float] Undocumented ** @param [w] t [ajint*] Undocumented ** @param [w] tc [ajint*] Undocumented ** @param [r] min_seg [ajint] Undocumented ** @return [void] ******************************************************************************/ static void newcoils_pred_coils(AjPFile outf, const char *seq, const char *ident, const AjPStr str, const struct hept_pref *h, ajint win, ajint which, ajint weighted,ajint mode, float min_P, ajint *t, ajint *tc, ajint min_seg) { ajint i; ajint j; ajint coillen = 0; ajint len; ajint pos; ajint aa_pt; ajint total_coil_segments; ajint are_there_coils; float actual_win; float this_score; float Gg; float Gcc; float power; float t1; float t2; float t3; float t4; float *score; float *P; char *hept_seq; len = strlen(seq); AJCNEW(score,len); AJCNEW(P,len); AJCNEW(hept_seq,len); /* printf("Sequence is %s length is %d\n",seq,len); */ for(i=0; i<len; ++i) { P[i] = 0.0; score[i] = 0.0; hept_seq[i] = 'x'; } for(i=0; i<(len-win+1); ++i) { this_score = 1.0; actual_win = 0.0; for(j=0; ((j<win) && ((i+j)<len)); ++j) { aa_pt = (int)(seq[i+j]-'A'); if((aa_pt>=0) && (aa_pt<26) && (NCAAs[aa_pt]!='_')) { pos = j%NCHEPTAD; /* Position in the heptad? pos modulus 7 */ if(weighted && (pos==0 || pos==3)) power = 2.5; else power = 1.0; actual_win += power; if(h->m[aa_pt][pos]!=-1) this_score *= pow(h->m[aa_pt][pos],power); else this_score *= pow(h->smallest,power); } } if(actual_win>0) this_score = pow(this_score,(1/(float)actual_win)); else this_score = 0; for(j=0; ((j<win) && ((i+j)<len)); ++j) { aa_pt = (int)(seq[i+j]-'A'); if((aa_pt>=0) && (aa_pt<26) && (NCAAs[aa_pt]!='_')) { pos = j%NCHEPTAD; /* Position in the heptad? pos modulus 7 */ if(this_score>score[i+j]) { score[i+j] = this_score; hept_seq[i+j] = 'a'+pos; } } } } if(mode==1) ajFmtPrintF(outf,">%s %S\n",ident,str); are_there_coils = 0; total_coil_segments = 0; for(i=0; i<len; ++i) { /* Calculate P */ t1 = 1/(h->f[which].sd_cc); t2 = (score[i]-(h->f[which].m_cc))/h->f[which].sd_cc; t3 = fabs(t2); t4 = pow(t3,2); t4 = t3*t3; Gcc = t1 * exp(-0.5*t4); t1 = 1/(h->f[which].sd_g); t2 = (score[i]-(h->f[which].m_g))/h->f[which].sd_g; t3 = fabs(t2); t4 = pow(t3,2); t4 = t3 * t3; Gg = t1 * exp(-0.5*t4); P[i] = Gcc/(h->f[which].sc*Gg+Gcc); if(P[i] >= min_P) { ++coillen; are_there_coils = 1; if((i==0) || (P[i-1]<min_P)) total_coil_segments++; (*tc)++; } (*t)++; if(mode==1) { if(P[i] >= min_P) ajFmtPrintF(outf,"x"); else ajFmtPrintF(outf,"%c",seq[i]); if(((i+1)%60)==0) ajFmtPrintF(outf,"\n"); } else if(mode == 0) ajFmtPrintF(outf,"%4d %c %c %7.3f %7.3f (%-7.3f %7.3f)\n",i+1, seq[i],hept_seq[i],score[i],P[i],Gcc,Gg); } if(mode==1) ajFmtPrintF(outf,"\n"); if((mode==2) && (are_there_coils==1) && (total_coil_segments >= min_seg)) { if(total_coil_segments==1) ajFmtPrintF(outf,"Pred %4d coil segment length:%d : %s %S\n", total_coil_segments,coillen,ident,str); else ajFmtPrintF(outf,"Pred %4d coil segments Total length:%d: %s %S\n", total_coil_segments,coillen,ident,str); } AJFREE(P); AJFREE(score); AJFREE(hept_seq); return; }
static AjBool ssematch_NWScore(AjPScop temp_scop, AjPSeq pseq, ajint mode, AjPMatrixf matrix, float gapopen, float gapextend) { ajint start1 =0; /* Start of seq 1, passed as arg but not used.*/ ajint start2 =0; /* Start of seq 2, passed as arg but not used.*/ ajint maxarr =300; /* Initial size for matrix. */ ajint len; ajint *compass; const char *p; /* Query sequence. */ const char *q; /* Subject sequence from scop object. */ float **sub; float id =0.; /* Passed as arg but not used here. */ float sim =0.; float idx =0.; /* Passed as arg but not used here. */ float simx =0.; /* Passed as arg but not used here. */ float *path; AjPStr pstr = NULL; /* m walk alignment for first sequence Passed as arg but not used here. */ AjPStr qstr = NULL; /* n walk alignment for second sequence Passed as arg but not used here. */ AjPSeq qseq = NULL; /* Subject sequence. */ ajint lenp; /* Length of query sequence. */ ajint lenq; /* Length of subject sequence. */ AjPSeqCvt cvt = 0; AjBool show = ajFalse; /*Passed as arg but not used here. */ AJCNEW(path, maxarr); AJCNEW(compass, maxarr); pstr = ajStrNew(); qstr = ajStrNew(); gapopen = ajRoundFloat(gapopen,8); gapextend = ajRoundFloat(gapextend,8); sub = ajMatrixfGetMatrix(matrix); cvt = ajMatrixfGetCvt(matrix); /* Extract subject sequence from scop object, convert to 3 letter code. */ if (mode == 0) qseq = ssematch_convertbases(temp_scop->Sse); else if (mode == 1) qseq = ssematch_convertbases(temp_scop->Sss); lenp = ajSeqGetLen(pseq); /* Length of query sequence. */ lenq = ajSeqGetLen(qseq); /* Length of subject sequence. */ /* Start of main application loop */ /* Intitialise variables for use by alignment functions*/ len = (lenp * lenq); if(len>maxarr) { AJCRESIZE(path,len); AJCRESIZE(compass,len); maxarr=len; } p = ajSeqGetSeqC(pseq); q = ajSeqGetSeqC(qseq); ajStrAssignC(&pstr,""); ajStrAssignC(&qstr,""); /* Check that no sequence length is 0. */ if((lenp == 0)||(lenq == 0)) { AJFREE(compass); AJFREE(path); ajStrDel(&pstr); ajStrDel(&qstr); } /* Call alignment functions. */ embAlignPathCalc(p,q,lenp,lenq, gapopen, gapextend,path,sub,cvt,compass,show); /*embAlignScoreNWMatrix(path,compass,gapopen,gapextend, pseq, qseq, lenp,lenq,sub,cvt, &start1,&start2);*/ embAlignWalkNWMatrix(path,pseq,qseq,&pstr,&qstr, lenp,lenq,&start1,&start2, gapopen,gapextend,compass); embAlignCalcSimilarity(pstr,qstr,sub,cvt,lenp, lenq,&id,&sim,&idx, &simx); /* Assign score. */ temp_scop->Score = sim; /* Tidy up */ AJFREE(compass); AJFREE(path); ajStrDel(&pstr); ajStrDel(&qstr); ajSeqDel(&qseq); /* Bye Bye */ return ajTrue; }
EmbPMatPrints embMatProtReadInt(AjPFile fp) { EmbPMatPrints ret; AjPStr line; ajint i; ajuint j; ajuint m; const char *p; line = ajStrNewC("#"); p = ajStrGetPtr(line); while(!*p || *p=='#' || *p=='!' || *p=='\n') { if(!ajReadlineTrim(fp,&line)) { ajStrDel(&line); return NULL; } p = ajStrGetPtr(line); } ajDebug("embMatProtReadint starting\n"); ajDebug ("Line: %S\n", line); AJNEW0 (ret); ret->cod = ajStrNew(); ajStrAssignS(&ret->cod,line); ajReadlineTrim(fp,&line); ret->acc = ajStrNew(); ajStrAssignS(&ret->acc,line); ajReadlineTrim(fp,&line); ajStrToUint(line,&ret->n); ajReadlineTrim(fp,&line); ret->tit = ajStrNew(); ajStrAssignS(&ret->tit,line); ajDebug ("Lineb: %S\n", line); AJCNEW(ret->len, ret->n); AJCNEW(ret->max, ret->n); AJCNEW(ret->thresh, ret->n); AJCNEW(ret->matrix, ret->n); for(m=0;m<ret->n;++m) { ajReadlineTrim(fp,&line); ajStrToUint(line,&ret->len[m]); ajReadlineTrim(fp,&line); ajStrToUint(line,&ret->thresh[m]); ajReadlineTrim(fp,&line); ajStrToUint(line,&ret->max[m]); ajDebug ("m: %d/%d len:%d thresh:%d max:%d\n", m, ret->n, ret->len[m], ret->thresh[m], ret->max[m]); for(i=0;i<26;++i) { AJCNEW0(ret->matrix[m][i], ret->len[m]); ajReadlineTrim(fp,&line); ajDebug ("Linec [%d][%d]: %S\n", m, i, line); p = ajStrGetPtr(line); for(j=0;j<ret->len[m];++j) { if(!j) p = ajSysFuncStrtok(p," "); else p = ajSysFuncStrtok(NULL," "); sscanf(p,"%u",&ret->matrix[m][i][j]); } } } ajReadlineTrim(fp,&line); ajDebug ("Linec: %S\n", line); ajStrDel(&line); return ret; }
int main(int argc, char **argv) { AjPSeqset seqset; const AjPSeq seq1; const AjPSeq seq2; ajint wordlen; AjPTable seq1MatchTable = NULL; AjPList matchlist ; AjPGraph graph = 0; ajuint i; ajuint j; float total=0; ajuint acceptableticks[]= { 1,10,50,100,200,500,1000,1500,10000,50000, 100000,500000,1000000,5000000 }; ajint numbofticks = 10; ajint gap,tickgap; AjBool boxit = AJTRUE; AjBool dumpfeat = AJFALSE; float xmargin; float ymargin; float k; char ptr[10]; float ticklen; float onefifth; AjPFeattable *tabptr = NULL; AjPFeattabOut seq1out = NULL; AjPStr sajb = NULL; float flen1; float flen2; ajuint tui; embInit("polydot", argc, argv); wordlen = ajAcdGetInt("wordsize"); seqset = ajAcdGetSeqset("sequences"); graph = ajAcdGetGraph("graph"); gap = ajAcdGetInt("gap"); boxit = ajAcdGetBoolean("boxit"); seq1out = ajAcdGetFeatout("outfeat"); dumpfeat = ajAcdGetToggle("dumpfeat"); sajb = ajStrNew(); embWordLength(wordlen); AJCNEW(lines,ajSeqsetGetSize(seqset)); AJCNEW(pts,ajSeqsetGetSize(seqset)); AJCNEW(tabptr,ajSeqsetGetSize(seqset)); for(i=0;i<ajSeqsetGetSize(seqset);i++) { seq1 = ajSeqsetGetseqSeq(seqset, i); total += ajSeqGetLen(seq1); } total +=(float)(gap*(ajSeqsetGetSize(seqset)-1)); xmargin = total*(float)0.15; ymargin = total*(float)0.15; ticklen = xmargin*(float)0.1; onefifth = xmargin*(float)0.2; i = 0; while(acceptableticks[i]*numbofticks < ajSeqsetGetLen(seqset)) i++; if(i<=13) tickgap = acceptableticks[i]; else tickgap = acceptableticks[13]; ajGraphAppendTitleS(graph, ajSeqsetGetUsa(seqset)); ajGraphOpenWin(graph, (float)0.0-xmargin,(total+xmargin)*(float)1.35, (float)0.0-ymargin, total+ymargin); ajGraphicsSetCharscale((float)0.3); for(i=0;i<ajSeqsetGetSize(seqset);i++) { which = i; seq1 = ajSeqsetGetseqSeq(seqset, i); tui = ajSeqGetLen(seq1); flen1 = (float) tui; if(embWordGetTable(&seq1MatchTable, seq1)){ /* get table of words */ for(j=0;j<ajSeqsetGetSize(seqset);j++) { seq2 = ajSeqsetGetseqSeq(seqset, j); tui = ajSeqGetLen(seq2); flen2 = (float) tui; if(boxit) ajGraphicsDrawposRect(xstart,ystart, xstart+flen1, ystart+flen2); matchlist = embWordBuildMatchTable(seq1MatchTable, seq2, ajTrue); if(matchlist) polydot_plotMatches(matchlist); if(i<j && dumpfeat) embWordMatchListConvToFeat(matchlist,&tabptr[i], &tabptr[j],seq1, seq2); if(matchlist) /* free the match structures */ embWordMatchListDelete(&matchlist); if(j==0) { for(k=0.0;k<ajSeqGetLen(seq1);k+=tickgap) { ajGraphicsDrawposLine(xstart+k,ystart,xstart+k, ystart-ticklen); sprintf(ptr,"%d",(ajint)k); ajGraphicsDrawposTextAtmid(xstart+k, ystart-(onefifth), ptr); } ajGraphicsDrawposTextAtmid( xstart+(flen1/(float)2.0), ystart-(3*onefifth), ajStrGetPtr(ajSeqsetGetseqNameS(seqset, i))); } if(i==0) { for(k=0.0;k<ajSeqGetLen(seq2);k+=tickgap) { ajGraphicsDrawposLine(xstart,ystart+k,xstart-ticklen, ystart+k); sprintf(ptr,"%d",(ajint)k); ajGraphicsDrawposTextAtend(xstart-(onefifth), ystart+k, ptr); } ajGraphicsDrawposTextAtlineJustify( xstart-(3*onefifth), ystart+(flen2/(float)2.0), xstart-(3*onefifth),ystart+flen2, ajStrGetPtr(ajSeqsetGetseqNameS(seqset, j)),0.5); } ystart += flen2+(float)gap; } } embWordFreeTable(&seq1MatchTable); seq1MatchTable = NULL; xstart += flen1+(float)gap; ystart = 0.0; } ajGraphicsDrawposTextAtstart(total+onefifth,total-(onefifth), "No. Length Lines Points Sequence"); for(i=0;i<ajSeqsetGetSize(seqset);i++) { seq1 = ajSeqsetGetseqSeq(seqset, i); ajFmtPrintS(&sajb,"%3u %6d %5d %6d %s",i+1, ajSeqGetLen(seq1),lines[i], pts[i],ajSeqGetNameC(seq1)); ajGraphicsDrawposTextAtstart(total+onefifth,total-(onefifth*(i+2)), ajStrGetPtr(sajb)); } if(dumpfeat && seq1out) { for(i=0;i<ajSeqsetGetSize(seqset);i++) { ajFeattableWrite(seq1out, tabptr[i]); ajFeattableDel(&tabptr[i]); } } ajGraphicsClose(); ajGraphxyDel(&graph); ajStrDel(&sajb); AJFREE(lines); AJFREE(pts); AJFREE(tabptr); ajSeqsetDel(&seqset); ajFeattabOutDel(&seq1out);; embExit(); return 0; }
void embConsCalc(const AjPSeqset seqset,const AjPMatrix cmpmatrix, ajint nseqs, ajint mlen,float fplural,float setcase, ajint identity, AjBool gaps, AjPStr *cons) { ajint i; ajint j; ajint k; ajint **matrix; ajint m1 = 0; ajint m2 = 0; ajint highindex; ajint matsize; ajint matchingmaxindex; ajint identicalmaxindex; float max; float contri = 0; float contrj = 0; float *identical; float *matching; AjPSeqCvt cvt = 0; AjPFloat score = NULL; const char **seqcharptr; char res; char nocon = '-'; void *freeptr; matrix = ajMatrixGetMatrix(cmpmatrix); cvt = ajMatrixGetCvt(cmpmatrix); /* return conversion table */ matsize = ajMatrixGetSize(cmpmatrix); AJCNEW(seqcharptr,nseqs); AJCNEW(identical,matsize); AJCNEW(matching,matsize); score = ajFloatNew(); if(ajSeqsetIsNuc(seqset)) /* set non-consensus character */ nocon = 'N'; else if ( ajSeqsetIsProt(seqset)) nocon = 'X'; for(i=0;i<nseqs;i++) /* get sequence as string */ seqcharptr[i] = ajSeqsetGetseqSeqC(seqset, i); for(k=0; k< mlen; k++) { res = nocon; for(i=0;i<matsize;i++) /* reset id's and +ve matches */ { identical[i] = 0.0; matching[i] = 0.0; } for(i=0;i<nseqs;i++) ajFloatPut(&score,i,0.); for(i=0;i<nseqs;i++) /* generate score for columns */ { m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[i][k]); if(m1 || gaps) identical[m1] += ajSeqsetGetseqWeight(seqset,i); for(j=i+1;j<nseqs;j++) { m2 = ajSeqcvtGetCodeK(cvt,seqcharptr[j][k]); if(m1 && m2) { contri = (float)matrix[m1][m2]* ajSeqsetGetseqWeight(seqset,j) +ajFloatGet(score,i); contrj = (float)matrix[m1][m2]* ajSeqsetGetseqWeight(seqset,i) +ajFloatGet(score,j); ajFloatPut(&score,i,contri); ajFloatPut(&score,j,contrj); } } } highindex = -1; max = -(float)INT_MAX; for(i=0;i<nseqs;i++) if( ajFloatGet(score,i) > max || (ajFloatGet(score,i) == max && seqcharptr[highindex][k] == '-') ) { highindex = i; max = ajFloatGet(score,i); } for(i=0;i<nseqs;i++) /* find +ve matches in the column */ { m1 = ajSeqcvtGetCodeK(cvt, seqcharptr[i][k]); if(!matching[m1]) for(j=0;j<nseqs;j++) { /* // if( i != j) // { // m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]); // if(m1 && m2 && matrix[m1][m2] > 0) // matching[m1] += ajSeqsetGetseqWeight(seqset, j); // } */ m2 = ajSeqcvtGetCodeK(cvt, seqcharptr[j][k]); if(m1 && m2 && matrix[m1][m2] > 0) matching[m1] += ajSeqsetGetseqWeight(seqset, j); if(gaps && !m1 && !m2) matching[m1] += ajSeqsetGetseqWeight(seqset, j); } } matchingmaxindex = 0; /* get max matching and identical */ identicalmaxindex = 0; for(i=0;i<nseqs;i++) { m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[i][k]); if(identical[m1] > identical[identicalmaxindex]) identicalmaxindex = m1; } for(i=0;i<nseqs;i++) { m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[i][k]); if(matching[m1] > matching[matchingmaxindex]) matchingmaxindex = m1; else if(matching[m1] == matching[matchingmaxindex]) if(identical[m1] > identical[matchingmaxindex]) matchingmaxindex = m1; } /* plurality check */ m1 = ajSeqcvtGetCodeK(cvt,seqcharptr[highindex][k]); /* if(matching[m1] >= fplural && seqcharptr[highindex][k] != '-') res = seqcharptr[highindex][k];*/ if(matching[m1] >= fplural) res = seqcharptr[highindex][k]; if(matching[m1]<= setcase) res = tolower((int)res); if(identity) /* if just looking for id's */ { j = 0; for(i=0;i<nseqs;i++) if(matchingmaxindex == ajSeqcvtGetCodeK(cvt,seqcharptr[i][k])) j++; if(j<identity) res = nocon; } ajStrAppendK(cons,res); } freeptr = (void *) seqcharptr; AJFREE(freeptr); AJFREE(matching); AJFREE(identical); ajFloatDel(&score); return; }
void embPropCalcFragments(const char *s, ajint n, AjPList *l, AjPList *pa, AjBool unfavoured, AjBool overlap, AjBool allpartials, ajint *ncomp, ajint *npart, AjPStr *rname, AjBool nterm, AjBool cterm, AjBool dorag, EmbPPropMolwt const *mwdata, AjBool mono) { static const char *PROPENZReagent[]= { "Trypsin","Lys-C","Arg-C","Asp-N","V8-bicarb","V8-phosph", "Chymotrypsin","CNBr" }; static const char *PROPENZSite[]= { "KR","K","R","D","E","DE","FYWLM","M" }; static const char *PROPENZAminoCarboxyl[]= { "CC","C","C","N","C","CC","CCCCC","C" }; static const char *PROPENZUnfavoured[]= { "KRIFLP","P","P","","KREP","P","P","" }; ajint i; ajint j; ajint lim; ajint len; AjPList t; EmbPPropFrag fr; ajint *begsa = NULL; ajint *endsa = NULL; double molwt; double *molwtsa = NULL; AjBool *afrag = NULL; ajint mark; ajint bwp; ajint ewp; ajint *ival; ajint defcnt; ajint it; ajint st = 0; ajint mt = 0; ajint et = 0; ajStrAssignC(rname,PROPENZReagent[n]); defcnt = 0; len = (ajint) strlen(s); t = ajListNew(); /* Temporary list */ /* First get all potential cut points */ for(i=0;i<len;++i) { if(!strchr(PROPENZSite[n],s[i])) continue; if(len==i+1) continue; if(strchr(PROPENZUnfavoured[n],s[i+1]) && !unfavoured) continue; AJNEW0(ival); *ival = i; ajListPushAppend(t,(void *)ival); ++defcnt; } if(defcnt) { AJCNEW(begsa,(defcnt+1)); AJCNEW(endsa,(defcnt+1)); AJCNEW(molwtsa,(defcnt+1)); AJCNEW(afrag,(defcnt+1)); } for(i=0;i<defcnt;++i) /* Pop them into a temporary array */ { ajListPop(t,(void **)&ival); endsa[i] = *ival; AJFREE(ival); } mark = 0; for(i=0;i<defcnt;++i) /* Work out true starts, ends and molwts */ { bwp = mark; ewp = endsa[i]; if(strchr(PROPENZAminoCarboxyl[n],'N')) --ewp; molwt=embPropCalcMolwt(s,bwp,ewp,mwdata,mono); if(n==PROPENZCNBR) molwt -= (17.045 + 31.095); begsa[i] = mark; endsa[i] = ewp; molwtsa[i] = molwt; afrag[i] = ajFalse; mark = ewp+1; } if(defcnt) /* Special treatment for last fragment */ { molwt = embPropCalcMolwt(s,mark,len-1,mwdata,mono); if(n==PROPENZCNBR) molwt -= (17.045 + 31.095); begsa[i] = mark; endsa[i] = len-1; molwtsa[i] = molwt; afrag[i] = ajFalse; ++defcnt; } /* Push the hits */ for(i=0;i<defcnt;++i) { if(dorag) { st = begsa[i]; et = endsa[i]; for(it=st+RAG_MINPEPLEN-1; it < et; ++it) { AJNEW0(fr); fr->start = st; fr->end = it; fr->molwt = embPropCalcMolwt(s,st,it,mwdata,mono); if(n == PROPENZCNBR) fr->molwt -= (17.045 + 31.095); fr->isfrag = ajTrue; ajListPush(*l,(void *)fr); } } AJNEW0(fr); fr->start = begsa[i]; fr->end = endsa[i]; fr->molwt = molwtsa[i]; fr->isfrag = afrag[i]; ajListPush(*l,(void *) fr); if(dorag && nterm) for(it=st+1; it < et-RAG_MINPEPLEN+2; ++it) { AJNEW0(fr); fr->start = it; fr->end = et; fr->molwt = embPropCalcMolwt(s,it,et,mwdata,mono); if(n == PROPENZCNBR) fr->molwt -= (17.045 + 31.095); fr->isfrag = ajTrue; ajListPush(*l,(void *)fr); } } if(!dorag) ajListSort(*l, &propFragCompare); *ncomp = defcnt; /* Now deal with overlaps */ *npart = 0; lim = defcnt -1; if(overlap && !allpartials) { for(i=0;i<lim;++i) { if(dorag) { st = begsa[i]; mt = endsa[i]; et = endsa[i+1]; if(cterm) for(it=mt+1; it < et; ++it) { AJNEW0(fr); fr->start = st; fr->end = it; fr->molwt = embPropCalcMolwt(s,st,it,mwdata,mono); if(n == PROPENZCNBR) fr->molwt -= (17.045 + 31.095); fr->isfrag = ajTrue; ajListPush(*l,(void *)fr); } } AJNEW0(fr); fr->isfrag = ajTrue; fr->molwt = embPropCalcMolwt(s,begsa[i],endsa[i+1],mwdata,mono); if(n==PROPENZCNBR) fr->molwt -= (17.045 + 31.095); fr->start = begsa[i]; fr->end = endsa[i+1]; ajListPush(*pa,(void *)fr); ++(*npart); if(dorag && nterm) for(it=st+1; it<mt; ++it) { AJNEW0(fr); fr->start = it; fr->end = et; fr->molwt = embPropCalcMolwt(s,it,et,mwdata,mono); if(n == PROPENZCNBR) fr->molwt -= (17.045 + 31.095); fr->isfrag = ajTrue; ajListPush(*l,(void *)fr); } } if(*npart) /* Remove complete sequence */ { --(*npart); ajListPop(*pa,(void **)&fr); } if(!dorag) ajListSort(*pa, &propFragCompare); } if(allpartials) { lim = defcnt; for(i=0;i<lim;++i) for(j=i+1;j<lim;++j) { AJNEW0(fr); fr->isfrag = ajTrue; fr->molwt = embPropCalcMolwt(s,begsa[i],endsa[j],mwdata,mono); if(n==PROPENZCNBR) fr->molwt -= (17.045 + 31.095); fr->start = begsa[i]; fr->end = endsa[j]; ajListPush(*pa,(void *)fr); ++(*npart); } if(*npart) /* Remove complete sequence */ { --(*npart); ajListPop(*pa,(void **)&fr); } if(!dorag) ajListSort(*pa, &propFragCompare); } if(defcnt) { AJFREE(molwtsa); AJFREE(endsa); AJFREE(begsa); AJFREE(afrag); } ajListFree(&t); return; }
/* @funcstatic newcoils_read_matrix ******************************************* ** ** Reads the matrix and stores in a hept_pref structure ** ** @param [u] inf [AjPFile] matrix input file ** @return [struct hept_pref*] Matrix data for heptad preference ******************************************************************************/ static struct hept_pref* newcoils_read_matrix(AjPFile inf) { ajint i; ajint j; ajint pt; ajint aa_len; ajint win; float m_g; float sd_g; float m_cc; float sd_cc; float sc; float hept[NCHEPTAD]; AjPStr buff; const char *pbuff; struct hept_pref *h; buff = ajStrNew(); aa_len = strlen(NCAAs); AJNEW(h); AJCNEW(h->m,aa_len); for(i=0; i<aa_len; ++i) { AJCNEW(h->m[i],NCHEPTAD); for(j=0; j<NCHEPTAD; ++j) h->m[i][j] = -1; } AJNEW(h->f); h->n = 0; h->smallest = 1.0; while(ajReadlineTrim(inf,&buff)) { pbuff = ajStrGetPtr(buff); if(*pbuff != '%') { if((strncmp(pbuff,"uw ",3)==0) || (strncmp(pbuff,"w ",2)==0)) { i = h->n; if(strncmp(pbuff,"uw ",3)==0) h->f[i].w = 0; else h->f[i].w = 1; ajFmtScanS(buff,"%*s %d %f %f %f %f %f",&win,&m_cc, &sd_cc,&m_g,&sd_g,&sc); h->f[i].win = win; h->f[i].m_cc = (float)m_cc; h->f[i].sd_cc = (float)sd_cc; h->f[i].m_g = (float)m_g; h->f[i].sd_g = (float)sd_g; h->f[i].sc = (float)sc; h->n++; AJCRESIZE(h->f,(h->n)+1); if((h->n)>=9) ajFatal("Too many window parms in matrix file\n"); } else if(*pbuff>='A' && *pbuff<='Z') { /* AA data */ pt = (int)(pbuff[0]-'A'); if(h->m[pt][0]==-1) { ajFmtScanS(buff,"%*s%f%f%f%f%f%f%f",&hept[0], &hept[1],&hept[2],&hept[3],&hept[4], &hept[5],&hept[6]); for(i=0; i<NCHEPTAD; ++i) { h->m[pt][i] = (float)hept[i]; if(h->m[pt][i]>0) { if(h->m[pt][i]<h->smallest) h->smallest = h->m[pt][i]; } else h->m[pt][i]=-1; /* Don't permit zero values */ } } else ajWarn("multiple entries for AA %c in matrix file\n", *pbuff); } else { ajWarn("strange characters in matrix file\n"); ajWarn("Ignoring line: %S\n",buff); } } } ajStrDel(&buff); return h; }
int main(int argc, char **argv) { AjPAlign align; AjPSeqall seqall; AjPSeq a; AjPSeq b; AjPStr alga; AjPStr algb; AjPStr ss; ajuint lena; ajuint lenb; const char *p; const char *q; ajint start1 = 0; ajint start2 = 0; float *path; ajint *compass; float* ix; float* iy; float* m; AjPMatrixf matrix; AjPSeqCvt cvt = 0; float **sub; float gapopen; float gapextend; float endgapopen; float endgapextend; ajulong maxarr = 1000; /* arbitrary. realloc'd if needed */ ajulong len; float score; AjBool dobrief = ajTrue; AjBool endweight = ajFalse; /* whether end gap penalties should be applied */ float id = 0.; float sim = 0.; float idx = 0.; float simx = 0.; AjPStr tmpstr = NULL; size_t stlen; embInit("needle", argc, argv); matrix = ajAcdGetMatrixf("datafile"); a = ajAcdGetSeq("asequence"); ajSeqTrim(a); seqall = ajAcdGetSeqall("bsequence"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); endgapopen = ajAcdGetFloat("endopen"); endgapextend = ajAcdGetFloat("endextend"); dobrief = ajAcdGetBoolean("brief"); endweight = ajAcdGetBoolean("endweight"); align = ajAcdGetAlign("outfile"); gapopen = ajRoundFloat(gapopen, 8); gapextend = ajRoundFloat(gapextend, 8); AJCNEW(path, maxarr); AJCNEW(compass, maxarr); AJCNEW(m, maxarr); AJCNEW(ix, maxarr); AJCNEW(iy, maxarr); alga = ajStrNew(); algb = ajStrNew(); ss = ajStrNew(); sub = ajMatrixfGetMatrix(matrix); cvt = ajMatrixfGetCvt(matrix); lena = ajSeqGetLen(a); while(ajSeqallNext(seqall,&b)) { ajSeqTrim(b); lenb = ajSeqGetLen(b); if(lenb > (ULONG_MAX/(ajulong)(lena+1))) ajFatal("Sequences too big. Try 'stretcher' or 'supermatcher'"); len = lena*lenb; if(len>maxarr) { stlen = (size_t) len; AJCRESIZETRY(path,stlen); if(!path) ajDie("Sequences too big. Try 'stretcher'"); AJCRESIZETRY(compass,stlen); if(!compass) ajDie("Sequences too big. Try 'stretcher'"); AJCRESIZETRY(m,stlen); if(!m) ajDie("Sequences too big. Try 'stretcher'"); AJCRESIZETRY(ix,stlen); if(!ix) ajDie("Sequences too big. Try 'stretcher'"); AJCRESIZETRY(iy,stlen); if(!iy) ajDie("Sequences too big. Try 'stretcher'"); maxarr=len; } p = ajSeqGetSeqC(a); q = ajSeqGetSeqC(b); ajStrAssignC(&alga,""); ajStrAssignC(&algb,""); score = embAlignPathCalcWithEndGapPenalties(p, q, lena, lenb, gapopen, gapextend, endgapopen, endgapextend, &start1, &start2, path, sub, cvt, m, ix, iy, compass, ajTrue, endweight); embAlignWalkNWMatrixUsingCompass(p, q, &alga, &algb, lena, lenb, &start1, &start2, compass); embAlignReportGlobal(align, a, b, alga, algb, start1, start2, gapopen, gapextend, score, matrix, ajSeqGetOffset(a), ajSeqGetOffset(b)); if(!dobrief) { embAlignCalcSimilarity(alga,algb,sub,cvt,lena,lenb,&id,&sim,&idx, &simx); ajFmtPrintS(&tmpstr,"Longest_Identity = %5.2f%%\n", id); ajFmtPrintAppS(&tmpstr,"Longest_Similarity = %5.2f%%\n", sim); ajFmtPrintAppS(&tmpstr,"Shortest_Identity = %5.2f%%\n", idx); ajFmtPrintAppS(&tmpstr,"Shortest_Similarity = %5.2f%%", simx); ajAlignSetSubHeaderApp(align, tmpstr); } ajAlignWrite(align); ajAlignReset(align); } ajAlignClose(align); ajAlignDel(&align); ajSeqallDel(&seqall); ajSeqDel(&a); ajSeqDel(&b); AJFREE(compass); AJFREE(path); AJFREE(ix); AJFREE(iy); AJFREE(m); ajStrDel(&alga); ajStrDel(&algb); ajStrDel(&ss); ajStrDel(&tmpstr); embExit(); return 0; }
int main(int argc, char **argv) { AjPList idlist; AjPList* fieldList = NULL; AjBool systemsort; AjBool cleanup; ajuint maxindex; ajuint maxidlen = 0; ajuint maxlen; AjPFile elistfile = NULL; AjPFile* alistfile = NULL; AjPStr dbname = NULL; AjPStr release = NULL; AjPStr datestr = NULL; AjPStr sortopt = NULL; void **entryIds = NULL; AjPStr directory; AjPStr indexdir; AjPStr filename; AjPStr exclude; AjPStr curfilename = NULL; AjPFile libr=NULL; AjPStr idformat = NULL; EmbPEntry entry; ajuint idtype = 0; ajuint idCount = 0; ajuint idDone; AjPList listInputFiles = NULL; void ** inputFiles = NULL; ajuint nfiles; ajuint ifile; ajuint filesize; short recsize; ajuint maxfilelen = 20; char date[4] = { 0,0,0,0 }; AjPStr tmpfname = NULL; AjPStr* fields = NULL; AjPFile entFile = NULL; AjPStr* divfiles = NULL; AjPRegexp regIdExp = NULL; ajint* maxFieldLen = NULL; ajuint ifield = 0; ajuint nfields = 0; AjPFile logfile = NULL; ajuint* countField = NULL; ajuint* fieldTot = NULL; ajuint idCountFile = 0; ajuint i; embInit("dbifasta", argc, argv); idformat = ajAcdGetListSingle("idformat"); fields = ajAcdGetList("fields"); directory = ajAcdGetDirectoryName("directory"); indexdir = ajAcdGetOutdirName("indexoutdir"); filename = ajAcdGetString("filenames"); exclude = ajAcdGetString("exclude"); dbname = ajAcdGetString("dbname"); release = ajAcdGetString("release"); datestr = ajAcdGetString("date"); systemsort = ajAcdGetBoolean("systemsort"); cleanup = ajAcdGetBoolean("cleanup"); sortopt = ajAcdGetString("sortoptions"); maxindex = ajAcdGetInt("maxindex"); logfile = ajAcdGetOutfile("outfile"); while(fields[nfields]) /* array ends with a NULL */ nfields++; if(nfields) { AJCNEW(maxFieldLen, nfields); AJCNEW0(countField, nfields); AJCNEW0(fieldTot, nfields); for(ifield=0; ifield < nfields; ifield++) maxFieldLen[ifield] = (ajint)maxindex * -1; if(systemsort) AJCNEW(alistfile, nfields); else { AJCNEW(fieldList, nfields); for(ifield=0; ifield < nfields; ifield++) fieldList[ifield] = ajListNew(); } } if(ajStrMatchC(datestr, "00/00/00")) ajFmtPrintS(&datestr, "%D", ajTimeRefTodayFmt("dbindex")); ajStrRemoveWhite(&dbname); /* used for temp filenames */ embDbiDateSet(datestr, date); idlist = ajListNew(); regIdExp = dbifasta_getExpr(idformat, &idtype); ajDebug("reading '%S/%S'\n", directory, filename); ajDebug("writing '%S/'\n", indexdir); listInputFiles = embDbiFileListExc(directory, filename, exclude); ajListSort(listInputFiles, &ajStrVcmp); nfiles = (ajuint) ajListToarray(listInputFiles, &inputFiles); if(!nfiles) ajDie("No input files in '%S' matched filename '%S'", directory, filename); embDbiLogHeader(logfile, dbname, release, datestr, indexdir, maxindex); embDbiLogFields(logfile, fields, nfields); embDbiLogSource(logfile, directory, filename, exclude, (AjPStr*) inputFiles, nfiles); embDbiLogCmdline(logfile); AJCNEW0(divfiles, nfiles); /* ** process each input file, one at a time */ for(ifile=0; ifile < nfiles; ifile++) { ajStrAssignS(&curfilename,(AjPStr) inputFiles[ifile]); embDbiFlatOpenlib(curfilename, &libr); ajFilenameTrimPath(&curfilename); if(ajStrGetLen(curfilename) >= maxfilelen) maxfilelen = ajStrGetLen(curfilename) + 1; ajDebug("processing filename '%S' ...\n", curfilename); ajDebug("processing file '%F' ...\n", libr); ajStrAssignS(&divfiles[ifile], curfilename); if(systemsort) /* elistfile for entries, alist for fields */ elistfile = embDbiSortOpen(alistfile, ifile, dbname, fields, nfields); idCountFile = 0; for(i=0;i<nfields;i++) countField[i] = 0; while((entry=dbifasta_NextFlatEntry(libr, ifile, regIdExp, idtype, systemsort, fields, maxFieldLen, &maxidlen, countField, elistfile, alistfile))) { idCountFile++; if(!systemsort) /* save the entry data in lists */ embDbiMemEntry(idlist, fieldList, nfields, entry, ifile); entry = NULL; } idCount += idCountFile; if(systemsort) { embDbiSortClose(&elistfile, alistfile, nfields); AJFREE(entry); } else { embDbiEntryDel(&dbifastaGEntry); } embDbiLogFile(logfile, curfilename, idCountFile, fields, countField, nfields); } /* write the division.lkp file */ embDbiWriteDivision(indexdir, dbname, release, date, maxfilelen, nfiles, divfiles, NULL); /* Write the entryname.idx index */ ajStrAssignC(&tmpfname, "entrynam.idx"); entFile = ajFileNewOutNamePathS(tmpfname, indexdir); recsize = maxidlen+10; filesize = 300 + (idCount*(ajint)recsize); embDbiHeader(entFile, filesize, idCount, recsize, dbname, release, date); if(systemsort) idDone = embDbiSortWriteEntry(entFile, maxidlen, dbname, nfiles, cleanup, sortopt); else /* save entries in entryIds array */ { idDone = embDbiMemWriteEntry(entFile, maxidlen, idlist, &entryIds); if(idDone != idCount) ajFatal("Duplicates not allowed for in-memory processing"); } embDbiHeaderSize(entFile, 300+(idDone*(ajint)recsize), idDone); ajFileClose(&entFile); /* Write the fields index files */ for(ifield=0; ifield < nfields; ifield++) { if(maxindex) maxlen = maxindex; else { if(maxFieldLen[ifield] >= 0) maxlen = maxFieldLen[ifield]; else maxlen = - maxFieldLen[ifield]; } if(systemsort) fieldTot[ifield] = embDbiSortWriteFields(dbname, release, date, indexdir, fields[ifield], maxlen, nfiles, idCount, cleanup, sortopt); else fieldTot[ifield] = embDbiMemWriteFields(dbname, release, date, indexdir, fields[ifield], maxlen, fieldList[ifield], entryIds); } embDbiLogFinal(logfile,maxindex, maxFieldLen, fields, fieldTot, nfields, nfiles, idDone, idCount); if(systemsort) embDbiRmEntryFile(dbname, cleanup); ajStrDel(&idformat); ajStrDelarray(&fields); ajStrDel(&filename); ajStrDel(&exclude); ajStrDel(&dbname); ajStrDel(&release); ajStrDel(&datestr); ajStrDel(&sortopt); ajStrDel(&directory); ajStrDel(&indexdir); ajStrDel(&tmpfname); ajFileClose(&libr); ajFileClose(&logfile); for(i=0;i<nfields;i++) { if(systemsort) { ajFileClose(&alistfile[i]); } else { ajListMap(fieldList[i], &embDbiFieldDelMap, NULL); ajListFree(&fieldList[i]); } } AJFREE(alistfile); AJFREE(fieldList); AJFREE(maxFieldLen); AJFREE(countField); AJFREE(fieldTot); for(i=0;i<nfiles;i++) { ajStrDel(&divfiles[i]); } AJFREE(divfiles); AJFREE(inputFiles); embDbiEntryDel(&dbifastaGEntry); ajStrDel(&dbifastaGRline); ajStrDel(&dbifastaGTmpId); if(dbifastaGFdl) { for(i=0; i < nfields; i++) ajListFree(&dbifastaGFdl[i]); AJFREE(dbifastaGFdl); } ajListMap(idlist, &embDbiEntryDelMap, NULL); ajListFree(&idlist); ajListstrFreeData(&listInputFiles); AJFREE(entryIds); ajRegFree(&dbifastaGIdexp); ajRegFree(&dbifastaGWrdexp); ajRegFree(®IdExp); ajStrDel(&dbifastaGTmpAc); ajStrDel(&dbifastaGTmpSv); ajStrDel(&dbifastaGTmpGi); ajStrDel(&dbifastaGTmpDb); ajStrDel(&dbifastaGTmpDes); ajStrDel(&dbifastaGTmpFd); ajStrDel(&curfilename); embExit(); return 0; }
static EmbPEntry dbifasta_NextFlatEntry(AjPFile libr, ajint ifile, AjPRegexp idexp, ajuint type, AjBool systemsort, AjPStr const * fields, ajint* maxFieldLen, ajuint* maxidlen, ajuint* countfield, AjPFile elistfile, AjPFile* alistfile) { ajint ir; ajint is = 0; char* token; ajint i; static ajint nfields; ajint ifield; if(!dbifastaGFdl) { nfields = 0; while(fields[nfields]) nfields++; if(nfields) AJCNEW(dbifastaGFdl, nfields); for(i=0; i < nfields; i++) { dbifastaGFdl[i] = ajListNew(); } } if(!dbifastaGEntry || !systemsort) dbifastaGEntry = embDbiEntryNew(nfields); if(!dbifasta_ParseFasta(libr, &ir, maxFieldLen, countfield, idexp, type, alistfile, systemsort, fields)) return NULL; /* id to dbifastaGEntry->entry */ if(ajStrGetLen(dbifastaGTmpId) > *maxidlen) *maxidlen = ajStrGetLen(dbifastaGTmpId); if(systemsort) ajFmtPrintF(elistfile, "%S %d %d %d\n", dbifastaGTmpId, ir, is, ifile+1); else { dbifastaGEntry->entry = ajCharNewS(dbifastaGTmpId); dbifastaGEntry->rpos = ir; dbifastaGEntry->spos = is; dbifastaGEntry->filenum = ifile+1; /* field tokens as list, then move to ret->field */ for(ifield=0; ifield < nfields; ifield++) { dbifastaGEntry->nfield[ifield] = (ajuint) ajListGetLength(dbifastaGFdl[ifield]); if(dbifastaGEntry->nfield[ifield]) { AJCNEW(dbifastaGEntry->field[ifield], dbifastaGEntry->nfield[ifield]); i = 0; while(ajListPop(dbifastaGFdl[ifield], (void**) &token)) dbifastaGEntry->field[ifield][i++] = token; } else dbifastaGEntry->field[ifield] = NULL; } } return dbifastaGEntry; }
int main(int argc, char **argv) { AjPFile infile = NULL; AjPFile outfile = NULL; AjPStr line; AjPGraph graphLB = NULL; AjPGraphdata xygraph = NULL; AjPGraphdata xygraph2 = NULL; AjBool doplot; ajint N=0; float *xdata = NULL; float *ydata = NULL; float *V = NULL; float *S = NULL; float a; float b; float upperXlimit; float upperYlimit; float A; float B; float C; float D; float xmin; float xmax; float ymin; float ymax; float xmin2; float xmax2; float ymin2; float ymax2; float Vmax; float Km; float cutx; float cuty; float amin = 0.; float amax = 0.; float bmin = 0.; float bmax = 0.; embInit("findkm", argc, argv); infile = ajAcdGetInfile("infile"); outfile = ajAcdGetOutfile ("outfile"); doplot = ajAcdGetBoolean("plot"); graphLB = ajAcdGetGraphxy("graphLB"); line = ajStrNew(); /* Determine N by reading infile */ while(ajReadlineTrim(infile, &line)) if(ajStrGetLen(line) >0) N++; /* only allocate memory to the arrays */ AJCNEW(xdata, N); AJCNEW(ydata, N); AJCNEW(S, N); AJCNEW(V, N); ajFileSeek(infile, 0L, 0); N=0; while(ajReadlineTrim(infile, &line)) { if(ajStrGetLen(line) > 0) { sscanf(ajStrGetPtr(line),"%f %f",&S[N],&V[N]); if(S[N] > 0.0 && V[N] > 0.0) { xdata[N] = S[N]; ydata[N] = S[N]/V[N]; N++; } } } /* find the max and min values for the graph parameters*/ xmin = (float)0.5*findkm_findmin(xdata, N); xmax = (float)1.5*findkm_findmax(xdata, N); ymin = (float)0.5*findkm_findmin(ydata, N); ymax = (float)1.5*findkm_findmax(ydata, N); xmin2 = (float)0.5*findkm_findmin(S, N); xmax2 = (float)1.5*findkm_findmax(S, N); ymin2 = (float)0.5*findkm_findmin(V, N); ymax2 = (float)1.5*findkm_findmax(V, N); /* ** In case the casted ints turn out to be same number on the axis, ** make the max number larger than the min so graph can be seen. */ if((ajint)xmax == (ajint)xmin) ++xmax; if((ajint)ymax == (ajint)ymin) ++ymax; if((ajint)xmax2 == (ajint)xmin2) ++xmax2; if((ajint)ymax2 == (ajint)ymin2) ++ymax2; /* ** Gaussian Elimination for Best-fit curve plotting and ** calculating Km and Vmax */ A = findkm_summation(xdata, N); B = findkm_summation(ydata, N); C = findkm_multisum(xdata, ydata, N); D = findkm_multisum(xdata, xdata, N); /* ** To find the best fit line, Least Squares Fit: y =ax +b; ** Two Simultaneous equations, REARRANGE FOR b ** ** findkm_summation(ydata, N) - findkm_summation(xdata,N)*a - N*b =0; ** b = (findkm_summation(ydata,N) - findkm_summation(xdata,N)*a) / N; ** b = (B - A*a)/ N; ** ** C - D*a - A*((B - A*a)/ N) =0; ** C - D*a - A*B/N + A*A*a/N =0; ** C - A*B/N = D*a - A*A*a/N; */ /* REARRANGE FOR a */ a = (N*C - A*B)/ (N*D - A*A); b = (B - A*a)/ N; /* ** Equation of Line - Lineweaver burk eqn ** 1/V = (Km/Vmax)*(1/S) + 1/Vmax; */ Vmax = 1/a; Km = b/a; cutx = -1/Km; cuty = Km/Vmax; /* set limits for last point on graph */ upperXlimit = findkm_findmax(xdata,N)+3; upperYlimit = (upperXlimit)*a + b; ajFmtPrintF(outfile, "---Hanes Woolf Plot Calculations---\n"); ajFmtPrintF(outfile, "Slope of best fit line is a = %.2f\n", a); ajFmtPrintF(outfile,"Coefficient in Eqn of line y = ma +b is b " "= %.2f\n", b); ajFmtPrintF(outfile, "Where line cuts x axis = (%.2f, 0)\n", cutx); ajFmtPrintF(outfile, "Where line cuts y axis = (0, %.2f)\n", cuty); ajFmtPrintF(outfile, "Limit-point of graph for plot = (%.2f, %.2f)\n\n", upperXlimit, upperYlimit); ajFmtPrintF(outfile, "Vmax = %.2f, Km = %f\n",Vmax, Km); /* draw graphs */ if(doplot) { xygraph = ajGraphdataNewI(N); ajGraphdataAddXY(xygraph, S, V); ajGraphDataAdd(graphLB, xygraph); ajGraphdataSetTitleC(xygraph, "Michaelis Menten Plot"); ajGraphdataSetXlabelC(xygraph, "[S]"); ajGraphdataSetYlabelC(xygraph, "V"); ajGraphxySetXstartF(graphLB, 0.0); ajGraphxySetXendF(graphLB, xmax2); ajGraphxySetYstartF(graphLB, 0.0); ajGraphxySetYendF(graphLB, ymax2); ajGraphxySetXrangeII(graphLB, (ajint)0.0, (ajint)xmax2); ajGraphxySetYrangeII(graphLB, (ajint)0.0, (ajint)ymax2); ajGraphdataAddposLine(xygraph, 0.0, 0.0, S[0], V[0], (ajint)BLACK); ajGraphxyShowPointsCircle(graphLB, ajTrue); ajGraphdataSetMinmax(xygraph,0.0,xmax2,0.0,ymax2); ajGraphicsCalcRange(S,N,&amin,&amax); ajGraphicsCalcRange(V,N,&bmin,&bmax); ajGraphdataSetTruescale(xygraph,amin,amax,bmin,bmax); ajGraphdataSetTypeC(xygraph,"2D Plot Float"); xygraph2 = ajGraphdataNewI(N); ajGraphdataAddXY(xygraph2, xdata, ydata); ajGraphDataAdd(graphLB, xygraph2); ajGraphdataSetTitleC(xygraph2, "Hanes Woolf Plot"); ajGraphdataSetXlabelC(xygraph2, "[S]"); ajGraphdataSetYlabelC(xygraph2, "[S]/V"); ajGraphxySetXstartF(graphLB, cutx); ajGraphxySetXendF(graphLB, upperXlimit); ajGraphxySetYstartF(graphLB, 0.0); ajGraphxySetYendF(graphLB, upperYlimit); ajGraphxySetXrangeII(graphLB, (ajint)cutx, (ajint)upperXlimit); ajGraphxySetYrangeII(graphLB, (ajint)0.0, (ajint)upperYlimit); ajGraphxyShowPointsCircle(graphLB, ajTrue); ajGraphdataSetMinmax(xygraph2, cutx,upperXlimit,0.0,upperYlimit); ajGraphicsCalcRange(xdata,N,&amin,&amax); ajGraphicsCalcRange(ydata,N,&bmin,&bmax); ajGraphdataSetTruescale(xygraph2,amin,amax,bmin,bmax); ajGraphdataSetTypeC(xygraph2,"2D Plot"); ajGraphSetTitleC(graphLB,"FindKm"); ajGraphxySetflagOverlay(graphLB,ajFalse); ajGraphxyDisplay(graphLB, ajTrue); } AJFREE(xdata); AJFREE(ydata); AJFREE(S); AJFREE(V); ajFileClose(&infile); ajFileClose(&outfile); ajGraphxyDel(&graphLB); ajStrDel(&line); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqall seqall; AjPSeq seq = NULL; AjPFile inf = NULL; AjPStr strand = NULL; AjPStr substr = NULL; AjPStr name = NULL; AjPStr mname = NULL; AjPStr tname = NULL; AjPStr pname = NULL; AjPStr line = NULL; AjPStr cons = NULL; AjPStr m = NULL; AjPStr n = NULL; AjPAlign align= NULL; /* JISON, replaces AjPOutfile outf */ ajint type; ajint begin; ajint end; ajulong len; ajint i; ajint j; float **fmatrix=NULL; ajint mlen; float maxfs; ajint thresh; float gapopen; float gapextend; float opencoeff; float extendcoeff; const char *p; ajulong maxarr = 1000; ajulong alen; float *path; ajint *compass; size_t stlen; embInit("prophet", argc, argv); seqall = ajAcdGetSeqall("sequence"); inf = ajAcdGetInfile("infile"); opencoeff = ajAcdGetFloat("gapopen"); extendcoeff = ajAcdGetFloat("gapextend"); align = ajAcdGetAlign("outfile"); /*JISON replacing outfile */ opencoeff = ajRoundFloat(opencoeff, 8); extendcoeff = ajRoundFloat(extendcoeff, 8); substr = ajStrNew(); name = ajStrNew(); mname = ajStrNew(); tname = ajStrNew(); line = ajStrNew(); m = ajStrNewC(""); n = ajStrNewC(""); type = prophet_getType(inf,&tname); if(!type) ajFatal("Unrecognised profile/matrix file format"); prophet_read_profile(inf,&pname,&mname,&mlen,&gapopen,&gapextend,&thresh, &maxfs, &cons); ajAlignSetMatrixName(align, mname); AJCNEW(fmatrix, mlen); for(i=0;i<mlen;++i) { AJCNEW(fmatrix[i], AZ); if(!ajReadlineTrim(inf,&line)) ajFatal("Missing matrix line"); p = ajStrGetPtr(line); p = ajSysFuncStrtok(p," \t"); for(j=0;j<AZ;++j) { sscanf(p,"%f",&fmatrix[i][j]); p = ajSysFuncStrtok(NULL," \t"); } } AJCNEW(path, maxarr); AJCNEW(compass, maxarr); while(ajSeqallNext(seqall, &seq)) { begin = ajSeqallGetseqBegin(seqall); end = ajSeqallGetseqEnd(seqall); ajStrAssignC(&name,ajSeqGetNameC(seq)); strand = ajSeqGetSeqCopyS(seq); ajStrAssignSubC(&substr,ajStrGetPtr(strand),begin-1,end-1); len = ajStrGetLen(substr); if(len > (ULONG_MAX/(ajulong)(mlen+1))) ajFatal("Sequences too big. Try 'supermatcher'"); alen = len*mlen; if(alen>maxarr) { stlen = (size_t) alen; AJCRESIZE(path,stlen); AJCRESIZE(compass,stlen); maxarr=alen; } ajStrAssignC(&m,""); ajStrAssignC(&n,""); /* JISON used to be prophet_scan_profile(substr,pname,name,mlen,fmatrix, outf,cons,opencoeff, extendcoeff,path,compass,&m,&n,len); */ /* JISON new call and reset align */ prophet_scan_profile(substr,name,pname,mlen,fmatrix, align,cons,opencoeff, extendcoeff,path,compass,&m,&n,(ajint)len); ajAlignReset(align); ajStrDel(&strand); } for(i=0;i<mlen;++i) AJFREE (fmatrix[i]); AJFREE (fmatrix); AJFREE(path); AJFREE(compass); ajStrDel(&line); ajStrDel(&cons); ajStrDel(&name); ajStrDel(&pname); ajStrDel(&mname); ajStrDel(&tname); ajStrDel(&substr); ajStrDel(&m); ajStrDel(&n); ajSeqDel(&seq); ajFileClose(&inf); ajAlignClose(align); ajAlignDel(&align); ajSeqallDel(&seqall); embExit(); return 0; }