void embPropCalcFragments(const char *s, ajint n, AjPList *l, AjPList *pa, AjBool unfavoured, AjBool overlap, AjBool allpartials, ajint *ncomp, ajint *npart, AjPStr *rname, AjBool nterm, AjBool cterm, AjBool dorag, EmbPPropMolwt const *mwdata, AjBool mono) { static const char *PROPENZReagent[]= { "Trypsin","Lys-C","Arg-C","Asp-N","V8-bicarb","V8-phosph", "Chymotrypsin","CNBr" }; static const char *PROPENZSite[]= { "KR","K","R","D","E","DE","FYWLM","M" }; static const char *PROPENZAminoCarboxyl[]= { "CC","C","C","N","C","CC","CCCCC","C" }; static const char *PROPENZUnfavoured[]= { "KRIFLP","P","P","","KREP","P","P","" }; ajint i; ajint j; ajint lim; ajint len; AjPList t; EmbPPropFrag fr; ajint *begsa = NULL; ajint *endsa = NULL; double molwt; double *molwtsa = NULL; AjBool *afrag = NULL; ajint mark; ajint bwp; ajint ewp; ajint *ival; ajint defcnt; ajint it; ajint st = 0; ajint mt = 0; ajint et = 0; ajStrAssignC(rname,PROPENZReagent[n]); defcnt = 0; len = (ajint) strlen(s); t = ajListNew(); /* Temporary list */ /* First get all potential cut points */ for(i=0;i<len;++i) { if(!strchr(PROPENZSite[n],s[i])) continue; if(len==i+1) continue; if(strchr(PROPENZUnfavoured[n],s[i+1]) && !unfavoured) continue; AJNEW0(ival); *ival = i; ajListPushAppend(t,(void *)ival); ++defcnt; } if(defcnt) { AJCNEW(begsa,(defcnt+1)); AJCNEW(endsa,(defcnt+1)); AJCNEW(molwtsa,(defcnt+1)); AJCNEW(afrag,(defcnt+1)); } for(i=0;i<defcnt;++i) /* Pop them into a temporary array */ { ajListPop(t,(void **)&ival); endsa[i] = *ival; AJFREE(ival); } mark = 0; for(i=0;i<defcnt;++i) /* Work out true starts, ends and molwts */ { bwp = mark; ewp = endsa[i]; if(strchr(PROPENZAminoCarboxyl[n],'N')) --ewp; molwt=embPropCalcMolwt(s,bwp,ewp,mwdata,mono); if(n==PROPENZCNBR) molwt -= (17.045 + 31.095); begsa[i] = mark; endsa[i] = ewp; molwtsa[i] = molwt; afrag[i] = ajFalse; mark = ewp+1; } if(defcnt) /* Special treatment for last fragment */ { molwt = embPropCalcMolwt(s,mark,len-1,mwdata,mono); if(n==PROPENZCNBR) molwt -= (17.045 + 31.095); begsa[i] = mark; endsa[i] = len-1; molwtsa[i] = molwt; afrag[i] = ajFalse; ++defcnt; } /* Push the hits */ for(i=0;i<defcnt;++i) { if(dorag) { st = begsa[i]; et = endsa[i]; for(it=st+RAG_MINPEPLEN-1; it < et; ++it) { AJNEW0(fr); fr->start = st; fr->end = it; fr->molwt = embPropCalcMolwt(s,st,it,mwdata,mono); if(n == PROPENZCNBR) fr->molwt -= (17.045 + 31.095); fr->isfrag = ajTrue; ajListPush(*l,(void *)fr); } } AJNEW0(fr); fr->start = begsa[i]; fr->end = endsa[i]; fr->molwt = molwtsa[i]; fr->isfrag = afrag[i]; ajListPush(*l,(void *) fr); if(dorag && nterm) for(it=st+1; it < et-RAG_MINPEPLEN+2; ++it) { AJNEW0(fr); fr->start = it; fr->end = et; fr->molwt = embPropCalcMolwt(s,it,et,mwdata,mono); if(n == PROPENZCNBR) fr->molwt -= (17.045 + 31.095); fr->isfrag = ajTrue; ajListPush(*l,(void *)fr); } } if(!dorag) ajListSort(*l, &propFragCompare); *ncomp = defcnt; /* Now deal with overlaps */ *npart = 0; lim = defcnt -1; if(overlap && !allpartials) { for(i=0;i<lim;++i) { if(dorag) { st = begsa[i]; mt = endsa[i]; et = endsa[i+1]; if(cterm) for(it=mt+1; it < et; ++it) { AJNEW0(fr); fr->start = st; fr->end = it; fr->molwt = embPropCalcMolwt(s,st,it,mwdata,mono); if(n == PROPENZCNBR) fr->molwt -= (17.045 + 31.095); fr->isfrag = ajTrue; ajListPush(*l,(void *)fr); } } AJNEW0(fr); fr->isfrag = ajTrue; fr->molwt = embPropCalcMolwt(s,begsa[i],endsa[i+1],mwdata,mono); if(n==PROPENZCNBR) fr->molwt -= (17.045 + 31.095); fr->start = begsa[i]; fr->end = endsa[i+1]; ajListPush(*pa,(void *)fr); ++(*npart); if(dorag && nterm) for(it=st+1; it<mt; ++it) { AJNEW0(fr); fr->start = it; fr->end = et; fr->molwt = embPropCalcMolwt(s,it,et,mwdata,mono); if(n == PROPENZCNBR) fr->molwt -= (17.045 + 31.095); fr->isfrag = ajTrue; ajListPush(*l,(void *)fr); } } if(*npart) /* Remove complete sequence */ { --(*npart); ajListPop(*pa,(void **)&fr); } if(!dorag) ajListSort(*pa, &propFragCompare); } if(allpartials) { lim = defcnt; for(i=0;i<lim;++i) for(j=i+1;j<lim;++j) { AJNEW0(fr); fr->isfrag = ajTrue; fr->molwt = embPropCalcMolwt(s,begsa[i],endsa[j],mwdata,mono); if(n==PROPENZCNBR) fr->molwt -= (17.045 + 31.095); fr->start = begsa[i]; fr->end = endsa[j]; ajListPush(*pa,(void *)fr); ++(*npart); } if(*npart) /* Remove complete sequence */ { --(*npart); ajListPop(*pa,(void **)&fr); } if(!dorag) ajListSort(*pa, &propFragCompare); } if(defcnt) { AJFREE(molwtsa); AJFREE(endsa); AJFREE(begsa); AJFREE(afrag); } ajListFree(&t); return; }
int main(int argc, char **argv) { AjPSeqall seqall; AjPSeq a; AjPStr substr; AjPStr rname; ajint be; ajint en; ajint len; AjBool unfavoured; AjBool overlap; AjBool allpartials; AjPStr menu; AjPStr rag; ajint n = 0; ajint r = 0; AjPFile outf = NULL; AjPReport report = NULL; AjPFeattable TabRpt = NULL; AjPStr tmpStr = NULL; AjPList l; AjPList pa; AjPFile mfptr = NULL; AjBool nterm = ajFalse; AjBool cterm = ajFalse; AjBool dorag = ajFalse; ajint ncomp; ajint npart; EmbPPropMolwt *mwdata = NULL; AjBool mono; embInit("digest", argc, argv); seqall = ajAcdGetSeqall("seqall"); menu = ajAcdGetListSingle("menu"); dorag = ajAcdGetBoolean("ragging"); rag = ajAcdGetListSingle("termini"); unfavoured = ajAcdGetBoolean("unfavoured"); overlap = ajAcdGetBoolean("overlap"); allpartials = ajAcdGetBoolean("allpartials"); report = ajAcdGetReport("outfile"); mfptr = ajAcdGetDatafile("mwdata"); mono = ajAcdGetBoolean("mono"); /* obsolete. Can be uncommented in acd file and here to reuse */ /* outf = ajAcdGetOutfile("originalfile"); */ ajStrToInt(menu, &n); --n; ajStrToInt(rag, &r); if(r==2 || r==4) nterm = ajTrue; if(r==3 || r==4) cterm = ajTrue; mwdata = embPropEmolwtRead(mfptr); while(ajSeqallNext(seqall, &a)) { substr = ajStrNew(); be = ajSeqGetBegin(a); en = ajSeqGetEnd(a); ajStrAssignSubC(&substr,ajSeqGetSeqC(a),be-1,en-1); ajStrFmtUpper(&substr); len = en-be+1; l = ajListNew(); pa = ajListNew(); rname = ajStrNew(); TabRpt = ajFeattableNewSeq(a); embPropCalcFragments(ajStrGetPtr(substr),n,&l,&pa, unfavoured,overlap, allpartials,&ncomp,&npart,&rname, nterm, cterm, dorag, mwdata, mono); if(outf) ajFmtPrintF(outf,"DIGEST of %s from %d to %d Molwt=%10.3f\n\n", ajSeqGetNameC(a),be,en, embPropCalcMolwt(ajSeqGetSeqC(a),0,len-1,mwdata,mono)); if(!ncomp) { if(outf) ajFmtPrintF(outf, "Is not proteolytically digested using %s\n", ajStrGetPtr(rname)); } else { if(outf) { ajFmtPrintF(outf,"Complete digestion with %s " "yields %d fragments:\n", ajStrGetPtr(rname),ncomp); digest_print_hits(l,outf,be,ajStrGetPtr(substr)); } ajFmtPrintS(&tmpStr, "Complete digestion with %S yields %d fragments", rname,ncomp); ajReportSetHeaderS(report, tmpStr); digest_report_hits(TabRpt,l,be, ajStrGetPtr(substr)); ajReportWrite(report, TabRpt, a); ajFeattableClear(TabRpt); } if(overlap && !allpartials && npart) { if(outf) { ajFmtPrintF(outf, "\n\nPartial digest with %s yields %d extras.\n", ajStrGetPtr(rname),npart); ajFmtPrintF(outf,"Only overlapping partials shown:\n"); digest_print_hits(pa,outf,be,ajStrGetPtr(substr)); } ajFmtPrintS(&tmpStr, "\n\nPartial digest with %S yields %d extras.\n", rname,npart); ajFmtPrintAppS(&tmpStr,"Only overlapping partials shown:\n"); ajReportSetHeaderS(report, tmpStr); digest_report_hits(TabRpt, pa,be,ajStrGetPtr(substr)); ajReportWrite(report, TabRpt, a); ajFeattableClear(TabRpt); } if(allpartials && npart) { if(outf) { ajFmtPrintF(outf, "\n\nPartial digest with %s yields %d extras.\n", ajStrGetPtr(rname),npart); ajFmtPrintF(outf,"All partials shown:\n"); digest_print_hits(pa,outf,be,ajStrGetPtr(substr)); } ajFmtPrintS(&tmpStr, "\n\nPartial digest with %S yields %d extras.\n", rname,npart); ajFmtPrintAppS(&tmpStr,"All partials shown:\n"); ajReportSetHeaderS(report, tmpStr); digest_report_hits(TabRpt, pa,be, ajStrGetPtr(substr)); ajReportWrite(report, TabRpt, a); ajFeattableClear(TabRpt); } } embPropMolwtDel(&mwdata); ajReportDel(&report); ajFeattableDel(&TabRpt); ajSeqDel(&a); ajSeqallDel(&seqall); ajStrDel(&rname); ajStrDel(&substr); ajListFree(&pa); ajListFree(&l); ajStrDel(&menu); ajStrDel(&rag); if(outf) ajFileClose(&outf); ajFileClose(&mfptr); ajStrDel(&tmpStr); embExit(); return 0; }