static void tfscan_print_hits(AjPList *l, ajint hits, AjPReport outf, const AjPTable t, const AjPSeq seq, ajuint minlength, const AjPTable btable) { ajint i; EmbPMatMatch m; const AjPStr acc = NULL; const AjPStr bf = NULL; AjPFeattable ftable = NULL; AjPFeature gf = NULL; AjPStr type = ajStrNewC("SO:0000235"); AjPStr tmpstr = NULL; ftable = ajFeattableNewSeq(seq); /*ajFmtPrintF(outf,"TFSCAN of %s from %d to %d\n\n",ajStrGetPtr(name), begin,end);*/ for(i=0;i<hits;++i) { ajListPop(*l,(void **)&m); acc = ajTableFetchS(t, m->seqname); if(m->len >= minlength) { /*ajFmtPrintF(outf,"%-20s %-8s %-5d %-5d %s\n", ajStrGetPtr(m->seqname), ajStrGetPtr(acc),m->start, m->start+m->len-1,ajStrGetPtr(s));*/ if(m->forward) gf = ajFeatNew(ftable, ajUtilGetProgram(), type, m->start, m->start+m->len-1, (float) m->score, '+',0); else gf = ajFeatNew(ftable, ajUtilGetProgram(), type, m->start, m->start+m->len-1, (float) m->score, '-',0); ajFmtPrintS(&tmpstr, "*acc %S", acc); ajFeatTagAddSS(gf, NULL, tmpstr); bf = ajTableFetchS(btable, m->seqname); ajFmtPrintS(&tmpstr, "*factor %S", bf); ajFeatTagAddSS(gf, NULL, tmpstr); } embMatMatchDel(&m); } ajReportWrite(outf, ftable, seq); ajFeattableDel(&ftable); ajStrDel(&tmpstr); ajStrDel(&type); return; }
int main(int argc, char **argv) { AjPFile inf = NULL; AjPFile inf2 = NULL; AjPFeattable tab = NULL; AjPReport report = NULL; AjPSeq sequence = NULL; AjPStr redatanew = NULL; AjPStr str = NULL; AjPStr regexp = NULL; AjPStr temp = NULL; AjPStr text = NULL; AjPStr docdata = NULL; AjPStr data = NULL; AjPStr accession = NULL; AjPStr name = NULL; EmbPPatMatch match = NULL; AjPStr savereg = NULL; AjPStr fthit = NULL; AjBool full; AjBool prune; ajint i; ajint number; ajint start; ajint end; ajint length; ajint zstart; ajint zend; const char *p; ajint seqlength; AjPStr tmpstr = NULL; AjPStr tailstr = NULL; AjPFeature gf; embInit("patmatmotifs", argc, argv); ajStrAssignC(&fthit, "SO:0001067"); savereg = ajStrNew(); str = ajStrNew(); regexp = ajStrNew(); temp = ajStrNew(); data = ajStrNew(); accession = ajStrNew(); text = ajStrNew(); name = ajStrNew(); sequence = ajAcdGetSeq("sequence"); report = ajAcdGetReport("outfile"); full = ajAcdGetBoolean("full"); prune = ajAcdGetBoolean("prune"); ajSeqFmtUpper(sequence); /* prosite regexs are all upper case */ tab = ajFeattableNewSeq(sequence); ajStrAssignC(&tailstr, ""); seqlength = ajStrGetLen(str); str = ajSeqGetSeqCopyS(sequence); redatanew = ajStrNewC("PROSITE/prosite.lines"); docdata = ajStrNewC("PROSITE/"); inf = ajDatafileNewInNameS(redatanew); if(!inf) ajFatal("Either EMBOSS_DATA undefined or PROSEXTRACT needs running"); ajFmtPrintAppS(&tmpstr, "Full: %B\n", full); ajFmtPrintAppS(&tmpstr, "Prune: %B\n", prune); ajFmtPrintAppS(&tmpstr, "Data_file: %F\n", inf); ajReportSetHeaderS(report, tmpstr); while(ajReadlineTrim(inf, ®exp)) { p=ajStrGetPtr(regexp); if(*p && *p!=' ' && *p!='^') { p=ajSysFuncStrtok(p," "); ajStrAssignC(&name,p); if(prune) if(ajStrMatchCaseC(name,"myristyl") || ajStrMatchCaseC(name,"asn_glycosylation") || ajStrMatchCaseC(name,"camp_phospho_site") || ajStrMatchCaseC(name,"pkc_phospho_site") || ajStrMatchCaseC(name,"ck2_phospho_site") || ajStrMatchCaseC(name,"tyr_phospho_site")) { for(i=0;i<4;++i) ajReadlineTrim(inf, ®exp); continue; } p=ajSysFuncStrtok(NULL," "); ajStrAssignC(&accession,p); } if(ajStrPrefixC(regexp, "^")) { p = ajStrGetPtr(regexp); ajStrAssignC(&temp,p+1); ajStrAssignC(&savereg,p+1); match = embPatMatchFind(temp, str, ajFalse, ajFalse); number = embPatMatchGetNumber(match); for(i=0; i<number; i++) { seqlength = ajStrGetLen(str); start = 1+embPatMatchGetStart(match, i); end = 1+embPatMatchGetEnd(match, i); length = embPatMatchGetLen(match, i); gf = ajFeatNew(tab, NULL, fthit, start, end, (float) length, ' ', 0); ajFmtPrintS(&tmpstr, "*motif %S", name); ajFeatTagAddSS(gf, NULL, tmpstr); if(start-5<0) zstart = 0; else zstart = start-5; if(end+5> seqlength) zend = end; else zend = end+5; ajStrAssignSubS(&temp, str, zstart, zend); } if(full && number) { ajStrAssignC(&redatanew,ajStrGetPtr(docdata)); ajStrAppendC(&redatanew,ajStrGetPtr(accession)); inf2 = ajDatafileNewInNameS(redatanew); if(!inf2) continue; /* ** Insert Prosite documentation from files made by ** prosextract.c */ ajFmtPrintAppS(&tailstr, "Motif: %S\n", name); ajFmtPrintAppS(&tailstr, "Count: %d\n\n", number); while(ajReadlineTrim(inf2, &text)) ajFmtPrintAppS(&tailstr, "%S\n", text); ajFmtPrintAppS(&tailstr, "\n***************\n\n"); ajFileClose(&inf2); } embPatMatchDel(&match); } } ajReportSetTailS(report,tailstr); ajReportWrite(report, tab, sequence); ajReportDel(&report); ajFeattableDel(&tab); ajStrDel(&temp); ajStrDel(®exp); ajStrDel(&savereg); ajStrDel(&str); ajStrDel(&data); ajStrDel(&docdata); ajStrDel(&text); ajStrDel(&redatanew); ajStrDel(&accession); ajSeqDel(&sequence); ajStrDel(&tailstr); ajStrDel(&fthit); ajStrDel(&name); ajStrDel(&tmpstr); ajFeattableDel(&tab); ajFileClose(&inf); embExit(); return 0; }
void embPatternRegexSearch (AjPFeattable ftable, const AjPSeq seq, const AjPPatternRegex pat, AjBool reverse) { ajint pos=0; ajint off; ajint len; AjPFeature sf = NULL; AjPStr substr = NULL; AjPStr seqstr = NULL; AjPStr tmpstr = NULL; AjPStr tmp = ajStrNew(); AjPRegexp patexp = ajPatternRegexGetCompiled(pat); ajint adj; AjBool isreversed; AjPSeq revseq; ajint seqlen; seqlen = ajSeqGetLen(seq); if(!seqlen) return; isreversed = ajSeqIsReversedTrue(seq); if(isreversed) seqlen += ajSeqGetOffset(seq); pos = ajSeqGetBeginTrue(seq); adj = ajSeqGetEndTrue(seq); if(!ajStrGetLen(featMotifProt)) ajStrAssignC(&featMotifProt, "SO:0001067"); if(!ajStrGetLen(featMotifNuc)) ajStrAssignC(&featMotifNuc, "SO:0000714"); /*ajDebug("embPatternRegexSearch pos: %d adj: %d reverse: %B\n", pos, adj, reverse, isreversed);*/ /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n", seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq), ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq));*/ if (reverse) { revseq = ajSeqNewSeq(seq); ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), pos-1, adj-1); ajSeqstrReverse(&seqstr); } ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), pos-1, adj-1); ajStrFmtUpper(&seqstr); while(ajStrGetLen(seqstr) && ajRegExec(patexp, seqstr)) { off = ajRegOffset(patexp); len = ajRegLenI(patexp, 0); if(off || len) { ajRegSubI(patexp, 0, &substr); ajRegPost(patexp, &tmp); ajStrAssignS(&seqstr, substr); ajStrAppendS(&seqstr, tmp); pos += off; /*ajDebug("match pos: %d adj: %d len: %d off:%d\n", pos, adj, len, off);*/ if (reverse) sf = ajFeatNew(ftable, NULL, featMotifNuc, adj - pos - len + 2, adj - pos + 1, 0.0, '-', 0); else { if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable)) sf = ajFeatNewProt(ftable, NULL, featMotifProt, pos, pos + len - 1, 0.0); else sf = ajFeatNew(ftable, NULL, featMotifNuc, pos, pos + len - 1, 0.0, '.', 0); } if(isreversed) ajFeatReverse(sf, seqlen); ajFmtPrintS (&tmpstr,"*pat %S: %S", ajPatternRegexGetName(pat), ajPatternRegexGetPattern(pat)); ajFeatTagAdd (sf,NULL,tmpstr); pos += 1; ajStrCutStart(&seqstr, 1); } else { pos++; ajStrCutStart(&seqstr, 1); } } ajStrDel(&tmpstr); ajStrDel(&tmp); ajStrDel(&substr); ajStrDel(&seqstr); if(reverse) ajSeqDel(&revseq); return; }
void embPatternSeqSearch (AjPFeattable ftable, const AjPSeq seq, const AjPPatternSeq pat, AjBool reverse) { const void *tidy; ajuint hits; ajuint i; AjPPatComp pattern; EmbPMatMatch m = NULL; AjPFeature sf = NULL; AjPSeq revseq = NULL; AjPList list = ajListNew(); AjPStr seqstr = ajStrNew(); AjPStr seqname = ajStrNew(); AjPStr tmp = ajStrNew(); ajint adj; ajint begin; AjBool isreversed; ajint seqlen; seqlen = ajSeqGetLen(seq); if(!seqlen) return; isreversed = ajSeqIsReversedTrue(seq); if(isreversed) seqlen += ajSeqGetOffset(seq); begin = ajSeqGetBeginTrue(seq); adj = ajSeqGetEndTrue(seq); if(!ajStrGetLen(featMotifProt)) ajStrAssignC(&featMotifProt, "SO:0001067"); if(!ajStrGetLen(featMotifNuc)) ajStrAssignC(&featMotifNuc, "SO:0000714"); ajStrAssignS(&seqname,ajSeqGetNameS(seq)); pattern = ajPatternSeqGetCompiled(pat); if (reverse) { revseq = ajSeqNewSeq(seq); ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), begin-1,adj-1); ajSeqstrReverse(&seqstr); } else ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), begin-1,adj-1); ajStrFmtUpper(&seqstr); /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n" "'%S'\n", seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq), ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq), seqstr);*/ ajDebug("embPatternSeqSearch '%S' protein: %B reverse: %B\n", pattern->pattern, pat->Protein, reverse); embPatFuzzSearchII(pattern,begin,seqname,seqstr,list, ajPatternSeqGetMismatch(pat),&hits,&tidy); ajDebug ("embPatternSeqSearch: found %d hits\n",hits); if(!reverse) ajListReverse(list); for(i=0;i<hits;++i) { ajListPop(list,(void **)&m); if (reverse) sf = ajFeatNew(ftable, NULL, featMotifNuc, adj - m->start - m->len + begin + 1, adj - m->start + begin, 0.0, '-', 0); else { if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable)) sf = ajFeatNewProt(ftable, NULL, featMotifProt, m->start, m->start + m->len - 1, 0.0); else sf = ajFeatNew(ftable, NULL, featMotifNuc, m->start, m->start + m->len - 1, 0.0, '.', 0); } if(isreversed) ajFeatReverse(sf, seqlen); /* ajUser("isrev: %B reverse: %B begin: %d adj: %d " "start: %d len: %d seqlen: %d %d..%d '%c'\n", isreversed, reverse, begin, adj, m->start, m->len, seqlen, sf->Start, sf->End, sf->Strand); */ ajFeatSetScore(sf, (float) (m->len - m->mm)); ajFmtPrintS(&tmp, "*pat %S: %S", ajPatternSeqGetName(pat), ajPatternSeqGetPattern(pat)); ajFeatTagAdd(sf,NULL,tmp); if(m->mm) { ajFmtPrintS(&tmp, "*mismatch %d", m->mm); ajFeatTagAdd(sf, NULL, tmp); } embMatMatchDel(&m); } ajStrDel(&seqname); ajStrDel(&seqstr); ajStrDel(&tmp); ajListFree(&list); if (reverse) ajSeqDel(&revseq); return; }
static void cpgreport_cpgsearch(AjPFile outf, ajint from, ajint to, const char *p, const char *name, ajint begin, ajint score, AjPFeattable feattable) { ajint i; ajint c; ajint z; ajint sum; ajint ssum; ajint lsum; ajint t; ajint top; ajint dcg; ajint dgc; ajint gc; char strand = '+'; ajint frame = 0; AjPFeature feature; float score2 = 0.0; if(!cpgreportSource) { ajStrAssignC(&cpgreportSource,"cpgreport"); ajStrAssignC(&cpgreportType,"misc_feature"); } for(i=from,c=to-1,sum=ssum=t=top=0,lsum=-1,z=begin-1;i<to;++i,ssum=sum) { if(p[i]=='C' && p[i+1]=='G' && c-i) sum += score+1; --sum; if(sum<0) sum = 0; if(!sum && ssum) { cpgreport_calcgc(lsum+1,t+2,p,&dcg,&dgc,&gc); if(dgc) { score2 = (float) top; feature = ajFeatNew(feattable, cpgreportSource, cpgreportType, lsum+2+z,t+2+z, score2, strand, frame) ; if(!feature) ajDebug("Error feature not added to feature table"); ajFmtPrintF(outf,"%-20.20s %6d %6d %5d ",name,lsum+2+z, t+2+z,top); ajFmtPrintF(outf," %5d %5.1f %6.2f\n", dcg,(float)gc*100.0/(float)(t+1-lsum), (float)(dcg/dgc)); } else { score2 = (float) top; feature = ajFeatNew(feattable, cpgreportSource, cpgreportType, lsum+2+z,t+2+z, score2, strand, frame) ; ajFmtPrintF(outf,"%-20s %6d %6d %5d ",name,lsum+2+z,t+2+z, top); ajFmtPrintF(outf," %5d %5.1f -\n", dcg,(float)gc*100.0/(float)(t+1-lsum)); } cpgreport_cpgsearch(outf,t+2,i,p,name,begin,score, feattable); sum = ssum = lsum = t = top = 0; } if(sum>top) { t = i; top = sum; } if(!sum) lsum = i; } if(sum) { cpgreport_calcgc(lsum+1,t+2,p,&dcg,&dgc,&gc); if(dgc) { ajFmtPrintF(outf,"%-20s %6d %6d %5d ",name,lsum+2+z,t+2+z, top); ajFmtPrintF(outf," %5d %5.1f %6.2f\n", dcg,(float)gc*100.0/(float)(t+1-lsum), ((float)dcg/(float)dgc)); score2 = (float) top; /*score2 = ajFmtPrintS(&score2,"%d.0",top);*/ feature = ajFeatNew(feattable, cpgreportSource, cpgreportType, lsum+2+z,t+2+z, score2, strand, frame); } else { ajFmtPrintF(outf,"%-20s %6d %6d %5d ",name,lsum+2+z,t+2+z,top); ajFmtPrintF(outf," %5d %5.1f -\n",dcg, (float)gc*100.0/(float)(t+1-lsum)); score2 = (float) top; /*score2 = ajFmtPrintS(&score2,"%d.0",top);*/ feature = ajFeatNew(feattable, cpgreportSource, cpgreportType, lsum+2+z,t+2+z, score2, strand, frame); } cpgreport_cpgsearch(outf,t+2,to,p,name,begin,score,feattable); } return; }