double embPropCalcMolextcoeff(const char *s, ajint start, ajint end, AjBool cystine, EmbPPropAmino const *aadata) { const char *p; char aa; double sum; ajint i; ajint len; ajuint havecystine = 0; len = end-start+1; p = s+start; sum = 0.0; for(i=0;i<len;++i) { aa = toupper((ajint)p[i]); if(aa == 'C') { if(!cystine) continue; havecystine++; if(havecystine % 2) continue; } sum += (double) aadata[ajBasecodeToInt(aa)]->extcoeff; } return sum; }
double embPropCalcMolwtMod(const char *s, ajint start, ajint end, EmbPPropMolwt const *mwdata, AjBool mono, double nmass, double cmass) { const char *p; double sum; ajint i; ajint len; ajint idx; double mw = 0.; len = end - start + 1; p = s + start; sum = 0.0; for(i=0;i<len;++i) { idx = ajBasecodeToInt(toupper((ajint)p[i])); mw = (mono) ? mwdata[idx]->mono : mwdata[idx]->average; sum += mw; } return sum + nmass + cmass; }
void embIepPkRead(double *pK) { AjPFile inf = NULL; AjPStr line; const char *p; double amino = 8.6; double carboxyl = 3.6; char ch; ajint i; inf = ajDatafileNewInNameC(PKFILE); if(!inf) ajFatal("%s file not found",PKFILE); for(i=0;i<EMBIEPSIZE;++i) pK[i]=0.0; line = ajStrNew(); while(ajReadline(inf,&line)) { p = ajStrGetPtr(line); if(*p=='#' || *p=='!' || *p=='\n' || *p=='\r') continue; if(ajStrPrefixCaseC(line,"Amino")) { p = ajSysFuncStrtok(p," \t\n\r"); p = ajSysFuncStrtok(NULL," \t\n\r"); sscanf(p,"%lf",&amino); continue; } if(ajStrPrefixCaseC(line,"Carboxyl")) { p = ajSysFuncStrtok(p," \t\n\r"); p = ajSysFuncStrtok(NULL," \t\n\r"); sscanf(p,"%lf",&carboxyl); continue; } p = ajSysFuncStrtok(p," \t\n\r"); ch = ajSysCastItoc(toupper((ajint)*p)); p = ajSysFuncStrtok(NULL," \t\n\r"); sscanf(p,"%lf",&pK[ajBasecodeToInt(ch)]); } pK[EMBIEPAMINO] = amino; pK[EMBIEPCARBOXYL] = carboxyl; ajStrDel(&line); ajFileClose(&inf); return; }
int main(int argc, char **argv) { /* Variable Declarations */ AjPStr code = NULL; AjPFile mfptr = NULL; AjPFile wfptr = NULL; AjPFile outf = NULL; EmbPPropMolwt *mwdata; EmbPPropAmino *aadata; float *dhstat = NULL; char code1; AjPStr code3 = NULL; ajint idx = 0; ajuint i; ajuint iend; AjPStr propstr = NULL; float charge; char csign; /* ACD File Processing */ embInit("inforesidue", argc, argv); code = ajAcdGetString("code"); mfptr = ajAcdGetDatafile("aadata"); wfptr = ajAcdGetDatafile("mwdata"); outf = ajAcdGetOutfile("outfile"); aadata = embPropEaminoRead(mfptr); mwdata = embPropEmolwtRead(wfptr); if(!embReadAminoDataFloatC(DAYHOFF_FILE,&dhstat,(float)0.001)) ajFatal("Set the EMBOSS_DATA environment variable"); /* Application logic */ ajStrFmtUpper(&code); iend = ajStrGetLen(code); ajFmtPrintF(outf, "%-4s %-5s %-20s %6s %9s %-30s %s\n", "Code", "Short", "Mnemonic", "Charge", "MolWt", "Properties", "Ambiguity"); for(i=0; i<iend; i++) { code1=ajStrGetCharPos(code,i); if(ajResidueExistsChar(code1)) { idx = ajBasecodeToInt(code1); ajResidueToTriplet(code1, &code3); ajStrFmtTitle(&code3); if(!embPropGetProperties(aadata[idx], &propstr)) ajStrAssignC(&propstr, "(none)"); charge = embPropGetCharge(aadata[idx]); if(charge > 0.0) csign = '+'; else if(charge < 0.0) csign = '-'; else csign = ' '; ajFmtPrintF(outf, "%-4c %-5S %-20S %3c%3.1f %9.4f %-30S %S\n", code1, code3, ajResidueGetMnemonic(code1), csign, fabs(charge), embPropMolwtGetMolwt(mwdata[idx]), propstr, ajResidueGetCodes(code1)); } else { ajFmtPrintF(outf, "%-4c %-5s %-20s %6s %9s %-30s %s\n", code1, ".", "invalid", ".", ".", ".", "."); } } /* Memory management and exit */ ajStrDel(&code); ajStrDel(&code3); ajStrDel(&propstr); ajFileClose(&outf); ajFileClose(&mfptr); ajFileClose(&wfptr); ajFileClose(&outf); embPropAminoDel(&aadata); embPropMolwtDel(&mwdata); AJFREE(dhstat); embExit(); return 0; }
void embIepCompC(const char *s, ajint amino, ajint carboxyl, ajint sscount, ajint modlysine, ajint *c) { ajint i; ajint j; const char *p; for(i=0;i<EMBIEPSIZE;++i) c[i]=0; p=s; while(*p) { ++c[ajBasecodeToInt(ajSysCastItoc(toupper((ajint)*p)))]; ++p; } if(c[1]) /* B = D or N use Dayhoff freq */ { j = (int) (0.5 + ((float)c[1]) * 5.5 / 9.8); c[3] += j; c[13] += c[1] - j; ajDebug("embIepCompC B:%d => D:%d N:%d\n", c[1], j, c[1]-j); c[1] = 0; } if(c[25]) /* Z = E or Q use Dayhoff freq */ { j = (int) (0.5 + ((float)c[25]) * 6.0 / 9.9); c[4] += j; c[16] += c[25] - j; ajDebug("embIepCompC Z:%d => E:%d Q:%d\n", c[25], j, c[25]-j); c[25] = 0; } c[EMBIEPAMINO] = amino; c[EMBIEPCARBOXYL] = carboxyl; if (sscount > 0) { if(c[EMBIEPCYSTEINE] < 2*sscount) { ajWarn("embIepCompC %d disulphides but only %d cysteines\n", sscount, c[EMBIEPCYSTEINE]+2*sscount); c[EMBIEPCYSTEINE] = 0; } else { c[EMBIEPCYSTEINE] -= 2*sscount; } } if (modlysine > 0) { if(c[EMBIEPLYSINE] < modlysine) { ajWarn("embIepCompC %d modified lysines but only %d lysines\n", sscount, c[EMBIEPLYSINE]); c[EMBIEPLYSINE] = 0; } else { c[EMBIEPLYSINE] -= modlysine; } } return; }
ajuint embMatProtScanInt(const AjPStr s, const AjPStr n, const EmbPMatPrints m, AjPList *l, AjBool *all, AjBool *ordered, AjBool overlap) { EmbPMatMatch mm; AjPStr t; char *p; char *q; ajint slen; ajint score; ajint mlen; ajint elem; ajint minpc; ajint maxscore; ajint limit; ajint sum; ajint hpe; ajint hpm; ajint lastelem; ajint lastpos; ajint op; ajint i; ajint j; t = ajStrNewC(ajStrGetPtr(s)); ajStrFmtUpper(&t); p = q = ajStrGetuniquePtr(&t); slen = ajStrGetLen(t); for(i=0;i<slen;++i,++p) *p = ajSysCastItoc(ajBasecodeToInt((ajint)*p)); p = q; *all = *ordered = ajTrue; lastelem = lastpos = INT_MAX; hpm=0; for(elem=(m)->n - 1;elem >= 0;--elem) { hpe = 0; mlen = (m)->len[elem]; minpc = (m)->thresh[elem]; maxscore = (m)->max[elem]; limit = slen-mlen; for(i=0;i<limit;++i) { sum = 0; for(j=0;j<mlen;++j) sum += (m)->matrix[elem][(ajint) p[i+j]][j]; score = (sum*100)/maxscore; if(score>=minpc) { if(elem<lastelem && *ordered) { if(lastelem == INT_MAX) { lastelem = elem; lastpos = i; } else { lastelem = elem; op = i; if(!overlap) op += mlen; if(op >= lastpos) *ordered = ajFalse; lastpos = i; } } ++hpe; ++hpm; matPushHitInt(n,m,l,i,score,elem,hpe,hpm); } } if(!hpe) *all = ajFalse; } if(hpm) { ajListPop(*l,(void **)&mm); if(*all) { mm->all = ajTrue; if(*ordered) mm->ordered = ajTrue; else mm->ordered = ajFalse; } else { mm->all = ajFalse; if(*ordered) mm->ordered = ajTrue; else mm->ordered = ajFalse; } ajListPush(*l,(void *)mm); } ajStrDel(&t); return hpm; }
EmbPPropAmino* embPropEaminoRead(AjPFile mfptr) { AjPStr line = NULL; AjPStr token = NULL; AjBool firstline; const char *p; ajuint i; ajint n; EmbPPropAmino *ret; line = ajStrNew(); token = ajStrNew(); firstline = ajTrue; AJCNEW0(ret,EMBPROPSIZE); for(i=0; i < EMBPROPSIZE; ++i) AJNEW0(ret[i]); while(ajReadline(mfptr, &line)) { ajStrRemoveWhiteExcess(&line); p = ajStrGetPtr(line); if(*p=='#' || *p=='!' || !*p) continue; if(firstline) { if(!ajStrPrefixC(line,"aa")) ajFatal("Incorrect (old?) format amino data file"); firstline = ajFalse; continue; } ajFmtScanS(line,"%S",&token); ajStrFmtUpper(&token); if(ajStrGetLen(token) != 1) ajFatal("Amino file line doesn't begin with a single character"); i = ajBasecodeToInt((ajint) *ajStrGetPtr(token)); if(i == 27) ajFatal("Amino file line doesn't begin with a single A->Z (%S)", line); n = ajFmtScanS(line,"%*s%d%d%d%d%d%d%f%d%d%d", &ret[i]->tiny, &ret[i]->sm_all, &ret[i]->aliphatic, &ret[i]->aromatic, &ret[i]->nonpolar, &ret[i]->polar, &ret[i]->charge, &ret[i]->pve, &ret[i]->nve, &ret[i]->extcoeff); if(n!= 10) ajFatal("Only %d columns in amino file - expected %d",n+1,11); } ajStrDel(&line); ajStrDel(&token); return ret; }
const char* embPropCharToThree(char c) { return embPropIntToThree(ajBasecodeToInt(c)); }
EmbPPropMolwt* embPropEmolwtRead(AjPFile mfptr) { AjPStr line = NULL; AjPStr token = NULL; AjBool firstline; const char *p; ajuint i; ajint n; EmbPPropMolwt *ret; line = ajStrNew(); token = ajStrNew(); firstline = ajTrue; AJCNEW0(ret,EMBPROPSIZE+2); for(i=0; i < EMBPROPSIZE+2; ++i) AJNEW0(ret[i]); while(ajReadline(mfptr, &line)) { ajStrRemoveWhiteExcess(&line); p = ajStrGetPtr(line); if(*p=='#' || *p=='!' || !*p) continue; if(firstline) { if(!ajStrPrefixC(line,"Mol")) ajFatal("Incorrect format molwt file: '%S'", line); firstline = ajFalse; continue; } ajFmtScanS(line,"%S",&token); ajStrFmtUpper(&token); if(ajStrGetLen(token) != 1) { if(ajStrPrefixC(token,"HYDROGEN")) { if(ajFmtScanS(line,"%*s%lf%lf", &ret[EMBPROPHINDEX]->average, &ret[EMBPROPHINDEX]->mono) != 2) ajFatal("Bad format hydrogen data line"); } else if(ajStrPrefixC(token,"OXYGEN")) { if(ajFmtScanS(line,"%*s%lf%lf", &ret[EMBPROPOINDEX]->average, &ret[EMBPROPOINDEX]->mono) != 2) ajFatal("Bad format oxygen data line"); } else if(ajStrPrefixC(token,"WATER")) { if(ajFmtScanS(line,"%*s%lf%lf", &ret[EMBPROPWINDEX]->average, &ret[EMBPROPWINDEX]->mono) != 2) ajFatal("Bad format water data line"); } else ajFatal("Unknown molwt token %S",token); continue; } i = ajBasecodeToInt((ajint) *ajStrGetPtr(token)); if(i == 27) ajFatal("Molwt file line doesn't begin with a single A->Z (%S)", line); n = ajFmtScanS(line,"%*s%lf%lf", &ret[i]->average, &ret[i]->mono); if(n != 2) ajFatal("Only %d columns in amino file - expected %d",n,3); } ajStrDel(&line); ajStrDel(&token); return ret; }