/* @funcstatic seqwords_keysearch ******************************************** ** ** Search swissprot with terms structure and writes a hitlist structure ** ** @param [r] inf [AjPFile] File pointer to swissprot database ** @param [r] terms [AjPTerms] Terms object pointer ** @param [w] hits [EmbPHitlist*] Hitlist object pointer ** ** @return [AjBool] True on success ** @@ ******************************************************************************/ static AjBool seqwords_keysearch(AjPFile inf, AjPTerms terms, EmbPHitlist *hits) { AjPStr line =NULL; /* Line of text. */ AjPStr id =NULL; /* Line of text. */ AjPStr temp =NULL; ajint s =0; /* Temp. start of hit value. */ ajint e =0; /* Temp. end of hit value. */ AjPInt start =NULL; /* Array of start of hit(s). */ AjPInt end =NULL; /* Array of end of hit(s). */ ajint nhits =0; /* Number of hits. */ ajint x =0; AjBool foundkw =ajFalse; AjBool foundft =ajFalse; /* Check for valid args. */ if(!inf) return ajFalse; /* Allocate strings and arrays. */ line = ajStrNew(); id = ajStrNew(); start = ajIntNew(); end = ajIntNew(); /* Start of main loop. */ while((ajReadlineTrim(inf,&line))) { /* Parse the AC line. */ if(ajStrPrefixC(line,"AC")) { /* Copy accesion number and remove the ';' from the end. */ ajFmtScanS(line, "%*s %S", &id); ajStrExchangeCC(&id, ";", "\0"); /* Reset flags & no. hits. */ foundkw=ajFalse; foundft=ajFalse; nhits=0; } /* Search the description and keyword lines with search terms. */ else if((ajStrPrefixC(line,"DE") || (ajStrPrefixC(line,"KW")))) { /* ** Search terms have a leading and trailing space to prevent ** them being found as substrings within other words. To ** catch cases where a DE or KW line begins with a search ** term, we must add a leading and trailing space to line. ** We must first remove punctation from the line to be parsed. */ ajStrExchangeSetCC(&line, ".,;:", " "); ajStrAppendK(&line, ' '); ajStrInsertC(&line, 0, " "); for (x = 0; x < terms->N; x++) /* Search term is found. */ if((ajStrFindCaseS(line, terms->Keywords[x])!=-1)) { foundkw=ajTrue; break; } } /* Search the feature table line with search terms. */ else if((ajStrPrefixC(line,"FT DOMAIN"))) { /* ** Search terms have a leading and trailing space to prevent ** them being found as substrings within other words. To ** catch cases where a FT line ends with a search ** term, we must add a trailing space to line ** We must first remove punctation from the line to be parsed. */ ajStrExchangeSetCC(&line, ".,;:", " "); ajStrAppendK(&line, ' '); for (x = 0; x < terms->N; x++) if((ajStrFindCaseS(line, terms->Keywords[x])!=-1)) { /* Search term is found. */ foundft = ajTrue; nhits++; /* Assign start and end of hit. */ ajFmtScanS(line, "%*s %*s %d %d", &s, &e); ajIntPut(&start, nhits-1, s); ajIntPut(&end, nhits-1, e); break; } } /* Parse the sequence. */ else if((ajStrPrefixC(line,"SQ") && ((foundkw == ajTrue) || (foundft == ajTrue)))) { /* Allocate memory for temp. sequence. */ temp = ajStrNew(); /* Read the sequence into hitlist structure. */ while((ajReadlineTrim(inf,&line)) && !ajStrPrefixC(line,"//")) /* Read sequence line into temp. */ ajStrAppendC(&temp,ajStrGetPtr(line)+3); /* Clean up temp. sequence. */ ajStrRemoveWhite(&temp); /*Priority is given to domain (rather than full length) sequence.*/ if(foundft) { for(x=0;x<nhits;x++) { /* Increment counter of hits for subsequent hits*/ (*hits)->N++; /* Reallocate memory for array of hits in hitlist structure. */ AJCRESIZE((*hits)->hits, (*hits)->N); (*hits)->hits[(*hits)->N-1]=embHitNew(); ajStrAssignC(&(*hits)->hits[(*hits)->N-1]->Model, "KEYWORD"); /* Assign start and end of hit. */ (*hits)->hits[(*hits)->N-1]->Start = ajIntGet(start, x); (*hits)->hits[(*hits)->N-1]->End = ajIntGet(end, x); /* Extract sequence within specified range */ ajStrAssignSubS(&(*hits)->hits[(*hits)->N - 1]->Seq, temp, (*hits)->hits[(*hits)->N - 1]->Start - 1, (*hits)->hits[(*hits)->N - 1]->End - 1); /* Put id into structure */ ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Acc, id); } } else { /* Increment counter of hits */ (*hits)->N++; /* Reallocate memory for array of hits in hitlist structure */ AJCRESIZE((*hits)->hits, (*hits)->N); (*hits)->hits[(*hits)->N-1]=embHitNew(); ajStrAssignC(&(*hits)->hits[(*hits)->N-1]->Model, "KEYWORD"); /* Extract whole sequence */ ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Seq, temp); (*hits)->hits[(*hits)->N - 1]->Start = 1; (*hits)->hits[(*hits)->N - 1]->End = ajStrGetLen((*hits)->hits[(*hits)->N - 1]->Seq); /* Put id into structure */ ajStrAssignRef(&(*hits)->hits[(*hits)->N - 1]->Acc, id); } /* Free temp. sequence */ ajStrDel(&temp); } } /* Clean up */ ajStrDel(&line); ajStrDel(&id); ajIntDel(&start); ajIntDel(&end); return ajTrue; }
int main(int argc, char **argv) { AjPSeqall seqall; AjPSeq seq = NULL; AjPFile outf = NULL; AjPStr strand = NULL; AjPStr substr = NULL; AjPStr bases = NULL; ajint begin; ajint end; ajint len; ajint minlen; float minobsexp; float minpc; ajint window; ajint shift; ajint plotstart; ajint plotend; float *xypc = NULL; float *obsexp = NULL; AjBool *thresh = NULL; float obsexpmax; ajint i; ajint maxarr; embInit("newcpgreport",argc,argv); seqall = ajAcdGetSeqall("sequence"); window = ajAcdGetInt("window"); shift = ajAcdGetInt("shift"); outf = ajAcdGetOutfile("outfile"); minobsexp = ajAcdGetFloat("minoe"); minlen = ajAcdGetInt("minlen"); minpc = ajAcdGetFloat("minpc"); substr = ajStrNew(); bases = ajStrNewC("CG"); maxarr = 0; while(ajSeqallNext(seqall, &seq)) { begin = ajSeqallGetseqBegin(seqall); end = ajSeqallGetseqEnd(seqall); strand = ajSeqGetSeqCopyS(seq); ajStrFmtUpper(&strand); ajStrAssignSubC(&substr,ajStrGetPtr(strand),--begin,--end); len=ajStrGetLen(substr); if(len > maxarr) { AJCRESIZE(obsexp, len); AJCRESIZE(thresh, len); AJCRESIZE(xypc, len); maxarr = len; } for(i=0;i<len;++i) obsexp[i]=xypc[i]=0.0; newcpgreport_findbases(substr, len, window, shift, obsexp, xypc, bases, &obsexpmax, &plotstart, &plotend); newcpgreport_identify(outf, obsexp, xypc, thresh, 0, len, shift, ajStrGetPtr(bases), ajSeqGetNameC(seq), minlen, minobsexp, minpc, ajStrGetPtr(strand)); ajStrDel(&strand); } ajStrDel(&bases); ajSeqDel(&seq); ajStrDel(&substr); ajFileClose(&outf); AJFREE(obsexp); AJFREE(thresh); AJFREE(xypc); ajSeqallDel(&seqall); embExit(); return 0; }
int main(int argc, char **argv) { AjPAlign align; AjPSeq a; AjPSeq b; AjPSeqout seqout; AjPStr m; AjPStr n; AjPStr merged = NULL; ajuint lena; ajuint lenb; const char *p; const char *q; ajint start1 = 0; ajint start2 = 0; float *path; ajint *compass; AjPMatrixf matrix; AjPSeqCvt cvt = 0; float **sub; float gapopen; float gapextend; ajulong maxarr = 1000; ajulong len; /* arbitrary. realloc'd if needed */ size_t stlen; float score; ajint begina; ajint beginb; embInit("merger", argc, argv); a = ajAcdGetSeq("asequence"); b = ajAcdGetSeq("bsequence"); seqout = ajAcdGetSeqout("outseq"); matrix = ajAcdGetMatrixf("datafile"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); align = ajAcdGetAlign("outfile"); gapopen = ajRoundFloat(gapopen, 8); gapextend = ajRoundFloat(gapextend, 8); AJCNEW(path, maxarr); AJCNEW(compass, maxarr); /* ** make the two sequences lowercase so we can show which one we are ** using in the merge by uppercasing it */ ajSeqFmtLower(a); ajSeqFmtLower(b); m = ajStrNew(); n = ajStrNew(); sub = ajMatrixfGetMatrix(matrix); cvt = ajMatrixfGetCvt(matrix); begina = ajSeqGetBegin(a); beginb = ajSeqGetBegin(b); lena = ajSeqGetLen(a); lenb = ajSeqGetLen(b); if(lenb > (ULONG_MAX/(ajulong)(lena+1))) ajFatal("Sequences too big. Try 'supermatcher'"); len = lena*lenb; if(len>maxarr) { ajDebug("merger: resize path, len to %d (%d * $d)\n", len, lena, lenb); stlen = (size_t) len; AJCRESIZE(path,stlen); AJCRESIZE(compass,stlen); maxarr=len; } p = ajSeqGetSeqC(a); q = ajSeqGetSeqC(b); ajStrAssignC(&m,""); ajStrAssignC(&n,""); score = embAlignPathCalc(p,q,lena,lenb,gapopen,gapextend,path,sub,cvt, compass, ajFalse); /*score = embAlignScoreNWMatrix(path,compass,gapopen,gapextend, a,b,lena,lenb,sub,cvt, &start1,&start2);*/ embAlignWalkNWMatrix(path,a,b,&m,&n,lena,lenb, &start1,&start2,gapopen, gapextend,compass); /* ** now construct the merged sequence, uppercase the bits of the two ** input sequences which are used in the merger */ merger_Merge(align, &merged,p,q,m,n,start1,start2, ajSeqGetNameC(a),ajSeqGetNameC(b)); embAlignReportGlobal(align, a, b, m, n, start1, start2, gapopen, gapextend, score, matrix, begina, beginb); ajAlignWrite(align); ajAlignReset(align); /* write the merged sequence */ ajSeqAssignSeqS(a, merged); ajSeqoutWriteSeq(seqout, a); ajSeqoutClose(seqout); ajSeqoutDel(&seqout); ajSeqDel(&a); ajSeqDel(&b); ajAlignClose(align); ajAlignDel(&align); ajStrDel(&merged); AJFREE(compass); AJFREE(path); ajStrDel(&n); ajStrDel(&m); embExit(); return 0; }
static AjBool ssematch_NWScore(AjPScop temp_scop, AjPSeq pseq, ajint mode, AjPMatrixf matrix, float gapopen, float gapextend) { ajint start1 =0; /* Start of seq 1, passed as arg but not used.*/ ajint start2 =0; /* Start of seq 2, passed as arg but not used.*/ ajint maxarr =300; /* Initial size for matrix. */ ajint len; ajint *compass; const char *p; /* Query sequence. */ const char *q; /* Subject sequence from scop object. */ float **sub; float id =0.; /* Passed as arg but not used here. */ float sim =0.; float idx =0.; /* Passed as arg but not used here. */ float simx =0.; /* Passed as arg but not used here. */ float *path; AjPStr pstr = NULL; /* m walk alignment for first sequence Passed as arg but not used here. */ AjPStr qstr = NULL; /* n walk alignment for second sequence Passed as arg but not used here. */ AjPSeq qseq = NULL; /* Subject sequence. */ ajint lenp; /* Length of query sequence. */ ajint lenq; /* Length of subject sequence. */ AjPSeqCvt cvt = 0; AjBool show = ajFalse; /*Passed as arg but not used here. */ AJCNEW(path, maxarr); AJCNEW(compass, maxarr); pstr = ajStrNew(); qstr = ajStrNew(); gapopen = ajRoundFloat(gapopen,8); gapextend = ajRoundFloat(gapextend,8); sub = ajMatrixfGetMatrix(matrix); cvt = ajMatrixfGetCvt(matrix); /* Extract subject sequence from scop object, convert to 3 letter code. */ if (mode == 0) qseq = ssematch_convertbases(temp_scop->Sse); else if (mode == 1) qseq = ssematch_convertbases(temp_scop->Sss); lenp = ajSeqGetLen(pseq); /* Length of query sequence. */ lenq = ajSeqGetLen(qseq); /* Length of subject sequence. */ /* Start of main application loop */ /* Intitialise variables for use by alignment functions*/ len = (lenp * lenq); if(len>maxarr) { AJCRESIZE(path,len); AJCRESIZE(compass,len); maxarr=len; } p = ajSeqGetSeqC(pseq); q = ajSeqGetSeqC(qseq); ajStrAssignC(&pstr,""); ajStrAssignC(&qstr,""); /* Check that no sequence length is 0. */ if((lenp == 0)||(lenq == 0)) { AJFREE(compass); AJFREE(path); ajStrDel(&pstr); ajStrDel(&qstr); } /* Call alignment functions. */ embAlignPathCalc(p,q,lenp,lenq, gapopen, gapextend,path,sub,cvt,compass,show); /*embAlignScoreNWMatrix(path,compass,gapopen,gapextend, pseq, qseq, lenp,lenq,sub,cvt, &start1,&start2);*/ embAlignWalkNWMatrix(path,pseq,qseq,&pstr,&qstr, lenp,lenq,&start1,&start2, gapopen,gapextend,compass); embAlignCalcSimilarity(pstr,qstr,sub,cvt,lenp, lenq,&id,&sim,&idx, &simx); /* Assign score. */ temp_scop->Score = sim; /* Tidy up */ AJFREE(compass); AJFREE(path); ajStrDel(&pstr); ajStrDel(&qstr); ajSeqDel(&qseq); /* Bye Bye */ return ajTrue; }
/* @funcstatic newcoils_read_matrix ******************************************* ** ** Reads the matrix and stores in a hept_pref structure ** ** @param [u] inf [AjPFile] matrix input file ** @return [struct hept_pref*] Matrix data for heptad preference ******************************************************************************/ static struct hept_pref* newcoils_read_matrix(AjPFile inf) { ajint i; ajint j; ajint pt; ajint aa_len; ajint win; float m_g; float sd_g; float m_cc; float sd_cc; float sc; float hept[NCHEPTAD]; AjPStr buff; const char *pbuff; struct hept_pref *h; buff = ajStrNew(); aa_len = strlen(NCAAs); AJNEW(h); AJCNEW(h->m,aa_len); for(i=0; i<aa_len; ++i) { AJCNEW(h->m[i],NCHEPTAD); for(j=0; j<NCHEPTAD; ++j) h->m[i][j] = -1; } AJNEW(h->f); h->n = 0; h->smallest = 1.0; while(ajReadlineTrim(inf,&buff)) { pbuff = ajStrGetPtr(buff); if(*pbuff != '%') { if((strncmp(pbuff,"uw ",3)==0) || (strncmp(pbuff,"w ",2)==0)) { i = h->n; if(strncmp(pbuff,"uw ",3)==0) h->f[i].w = 0; else h->f[i].w = 1; ajFmtScanS(buff,"%*s %d %f %f %f %f %f",&win,&m_cc, &sd_cc,&m_g,&sd_g,&sc); h->f[i].win = win; h->f[i].m_cc = (float)m_cc; h->f[i].sd_cc = (float)sd_cc; h->f[i].m_g = (float)m_g; h->f[i].sd_g = (float)sd_g; h->f[i].sc = (float)sc; h->n++; AJCRESIZE(h->f,(h->n)+1); if((h->n)>=9) ajFatal("Too many window parms in matrix file\n"); } else if(*pbuff>='A' && *pbuff<='Z') { /* AA data */ pt = (int)(pbuff[0]-'A'); if(h->m[pt][0]==-1) { ajFmtScanS(buff,"%*s%f%f%f%f%f%f%f",&hept[0], &hept[1],&hept[2],&hept[3],&hept[4], &hept[5],&hept[6]); for(i=0; i<NCHEPTAD; ++i) { h->m[pt][i] = (float)hept[i]; if(h->m[pt][i]>0) { if(h->m[pt][i]<h->smallest) h->smallest = h->m[pt][i]; } else h->m[pt][i]=-1; /* Don't permit zero values */ } } else ajWarn("multiple entries for AA %c in matrix file\n", *pbuff); } else { ajWarn("strange characters in matrix file\n"); ajWarn("Ignoring line: %S\n",buff); } } } ajStrDel(&buff); return h; }
int main(int argc, char **argv) { AjPSeqall seqall; AjPSeq seq = NULL; AjPFile inf = NULL; AjPStr strand = NULL; AjPStr substr = NULL; AjPStr name = NULL; AjPStr mname = NULL; AjPStr tname = NULL; AjPStr pname = NULL; AjPStr line = NULL; AjPStr cons = NULL; AjPStr m = NULL; AjPStr n = NULL; AjPAlign align= NULL; /* JISON, replaces AjPOutfile outf */ ajint type; ajint begin; ajint end; ajulong len; ajint i; ajint j; float **fmatrix=NULL; ajint mlen; float maxfs; ajint thresh; float gapopen; float gapextend; float opencoeff; float extendcoeff; const char *p; ajulong maxarr = 1000; ajulong alen; float *path; ajint *compass; size_t stlen; embInit("prophet", argc, argv); seqall = ajAcdGetSeqall("sequence"); inf = ajAcdGetInfile("infile"); opencoeff = ajAcdGetFloat("gapopen"); extendcoeff = ajAcdGetFloat("gapextend"); align = ajAcdGetAlign("outfile"); /*JISON replacing outfile */ opencoeff = ajRoundFloat(opencoeff, 8); extendcoeff = ajRoundFloat(extendcoeff, 8); substr = ajStrNew(); name = ajStrNew(); mname = ajStrNew(); tname = ajStrNew(); line = ajStrNew(); m = ajStrNewC(""); n = ajStrNewC(""); type = prophet_getType(inf,&tname); if(!type) ajFatal("Unrecognised profile/matrix file format"); prophet_read_profile(inf,&pname,&mname,&mlen,&gapopen,&gapextend,&thresh, &maxfs, &cons); ajAlignSetMatrixName(align, mname); AJCNEW(fmatrix, mlen); for(i=0;i<mlen;++i) { AJCNEW(fmatrix[i], AZ); if(!ajReadlineTrim(inf,&line)) ajFatal("Missing matrix line"); p = ajStrGetPtr(line); p = ajSysFuncStrtok(p," \t"); for(j=0;j<AZ;++j) { sscanf(p,"%f",&fmatrix[i][j]); p = ajSysFuncStrtok(NULL," \t"); } } AJCNEW(path, maxarr); AJCNEW(compass, maxarr); while(ajSeqallNext(seqall, &seq)) { begin = ajSeqallGetseqBegin(seqall); end = ajSeqallGetseqEnd(seqall); ajStrAssignC(&name,ajSeqGetNameC(seq)); strand = ajSeqGetSeqCopyS(seq); ajStrAssignSubC(&substr,ajStrGetPtr(strand),begin-1,end-1); len = ajStrGetLen(substr); if(len > (ULONG_MAX/(ajulong)(mlen+1))) ajFatal("Sequences too big. Try 'supermatcher'"); alen = len*mlen; if(alen>maxarr) { stlen = (size_t) alen; AJCRESIZE(path,stlen); AJCRESIZE(compass,stlen); maxarr=alen; } ajStrAssignC(&m,""); ajStrAssignC(&n,""); /* JISON used to be prophet_scan_profile(substr,pname,name,mlen,fmatrix, outf,cons,opencoeff, extendcoeff,path,compass,&m,&n,len); */ /* JISON new call and reset align */ prophet_scan_profile(substr,name,pname,mlen,fmatrix, align,cons,opencoeff, extendcoeff,path,compass,&m,&n,(ajint)len); ajAlignReset(align); ajStrDel(&strand); } for(i=0;i<mlen;++i) AJFREE (fmatrix[i]); AJFREE (fmatrix); AJFREE(path); AJFREE(compass); ajStrDel(&line); ajStrDel(&cons); ajStrDel(&name); ajStrDel(&pname); ajStrDel(&mname); ajStrDel(&tname); ajStrDel(&substr); ajStrDel(&m); ajStrDel(&n); ajSeqDel(&seq); ajFileClose(&inf); ajAlignClose(align); ajAlignDel(&align); ajSeqallDel(&seqall); embExit(); return 0; }