void embPatternRegexSearch (AjPFeattable ftable, const AjPSeq seq, const AjPPatternRegex pat, AjBool reverse) { ajint pos=0; ajint off; ajint len; AjPFeature sf = NULL; AjPStr substr = NULL; AjPStr seqstr = NULL; AjPStr tmpstr = NULL; AjPStr tmp = ajStrNew(); AjPRegexp patexp = ajPatternRegexGetCompiled(pat); ajint adj; AjBool isreversed; AjPSeq revseq; ajint seqlen; seqlen = ajSeqGetLen(seq); if(!seqlen) return; isreversed = ajSeqIsReversedTrue(seq); if(isreversed) seqlen += ajSeqGetOffset(seq); pos = ajSeqGetBeginTrue(seq); adj = ajSeqGetEndTrue(seq); if(!ajStrGetLen(featMotifProt)) ajStrAssignC(&featMotifProt, "SO:0001067"); if(!ajStrGetLen(featMotifNuc)) ajStrAssignC(&featMotifNuc, "SO:0000714"); /*ajDebug("embPatternRegexSearch pos: %d adj: %d reverse: %B\n", pos, adj, reverse, isreversed);*/ /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n", seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq), ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq));*/ if (reverse) { revseq = ajSeqNewSeq(seq); ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), pos-1, adj-1); ajSeqstrReverse(&seqstr); } ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), pos-1, adj-1); ajStrFmtUpper(&seqstr); while(ajStrGetLen(seqstr) && ajRegExec(patexp, seqstr)) { off = ajRegOffset(patexp); len = ajRegLenI(patexp, 0); if(off || len) { ajRegSubI(patexp, 0, &substr); ajRegPost(patexp, &tmp); ajStrAssignS(&seqstr, substr); ajStrAppendS(&seqstr, tmp); pos += off; /*ajDebug("match pos: %d adj: %d len: %d off:%d\n", pos, adj, len, off);*/ if (reverse) sf = ajFeatNew(ftable, NULL, featMotifNuc, adj - pos - len + 2, adj - pos + 1, 0.0, '-', 0); else { if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable)) sf = ajFeatNewProt(ftable, NULL, featMotifProt, pos, pos + len - 1, 0.0); else sf = ajFeatNew(ftable, NULL, featMotifNuc, pos, pos + len - 1, 0.0, '.', 0); } if(isreversed) ajFeatReverse(sf, seqlen); ajFmtPrintS (&tmpstr,"*pat %S: %S", ajPatternRegexGetName(pat), ajPatternRegexGetPattern(pat)); ajFeatTagAdd (sf,NULL,tmpstr); pos += 1; ajStrCutStart(&seqstr, 1); } else { pos++; ajStrCutStart(&seqstr, 1); } } ajStrDel(&tmpstr); ajStrDel(&tmp); ajStrDel(&substr); ajStrDel(&seqstr); if(reverse) ajSeqDel(&revseq); return; }
AjPPatlistSeq ajPatlistSeqRead (const AjPStr patspec, const AjPStr patname, const AjPStr fmt, AjBool protein, ajuint mismatches) { AjPPatlistSeq patlist = NULL; AjPStr line = NULL; AjPStr name = NULL; AjPFilebuff infile = NULL; AjPRegexp mismreg = NULL; AjPStr patstr = NULL; AjPStr pat = NULL; ajuint mismatch = 0; ajint ifmt = 0; ajuint npat = 0; AjPStr namestr = NULL; ajStrAssignS(&namestr, patname); ajStrAssignEmptyC(&namestr, "pattern"); ajStrAssignS(&patstr, patspec); patlist = ajPatlistSeqNewType(protein); ifmt = patternSeqFormat(fmt); ajDebug("ajPatlistSeqRead patspec: '%S' patname: '%S' " "protein: %B mismatches: %d\n", patspec, patname, protein, mismatches); if(ajStrGetCharFirst(patstr) == '@') { ajStrCutStart(&patstr, 1); infile = ajFilebuffNewNameS(patstr); if(!infile) { ajErr("Unable to open pattern file '%S'", patstr); return NULL; } line = ajStrNew(); name = ajStrNew(); if(!ifmt) { ajBuffreadLineTrim(infile,&line); if(ajStrPrefixC(line, ">")) ifmt = 2; else ifmt = 1; ajFilebuffReset(infile); } switch(ifmt) { case 1: while (ajBuffreadLineTrim(infile,&line)) { npat++; ajStrAppendS (&pat,line); ajFmtPrintS(&name, "%S%u", namestr, npat); ajPatternSeqNewList(patlist,name,pat,mismatches); ajStrSetClear(&pat); } break; default: mismreg = ajRegCompC("<mismatch=(\\d+)>"); while (ajBuffreadLineTrim(infile,&line)) { if (ajStrGetCharFirst(line) == '>') { if (ajStrGetLen(name)) { ajPatternSeqNewList(patlist,name,pat, mismatch); ajStrSetClear(&name); ajStrSetClear(&pat); mismatch=mismatches; } ajStrCutStart(&line,1); if (ajRegExec(mismreg,line)) { ajRegSubI(mismreg,1,&name); ajStrToUint(name,&mismatch); ajStrTruncateLen(&line,ajRegOffset(mismreg)); ajStrTrimWhiteEnd(&line); } ajStrAssignS (&name,line); ajStrAssignEmptyS(&name, patname); } else ajStrAppendS (&pat,line); } ajStrAssignEmptyS(&name, patname); ajPatternSeqNewList(patlist,name,pat,mismatch); ajRegFree(&mismreg); break; } ajFilebuffDel(&infile); } else { ajStrAssignS(&name, namestr); ajPatternSeqNewList(patlist,name,patstr,mismatches); } ajStrDel(&name); ajStrDel(&line); ajStrDel(&pat); ajStrDel(&namestr); ajStrDel(&patstr); return patlist; }
static void stssearch_primTest(void **x,void *cl) { Primer* p; Primer primdata; AjBool testa; AjBool testb; AjBool testc; AjBool testd; ajint ioff; (void) cl; /* make it used */ p = (Primer*) x; primdata = *p; ntests++; if(!(ntests % 1000)) ajDebug("completed tests: %d\n", ntests); testa = ajRegExec(primdata->Prima, seqstr); if(testa) { ioff = ajRegOffset(primdata->Prima); ajDebug("%s: %S PrimerA matched at %d\n", ajSeqGetNameC(seq), primdata->Name, ioff); ajFmtPrintF(out, "%s: %S PrimerA matched at %d\n", ajSeqGetNameC(seq), primdata->Name, ioff); ajRegTrace(primdata->Prima); } testb = ajRegExec(primdata->Primb, seqstr); if(testb) { ioff = ajRegOffset(primdata->Primb); ajDebug("%s: %S PrimerB matched at %d\n", ajSeqGetNameC(seq), primdata->Name, ioff); ajFmtPrintF(out, "%s: %S PrimerB matched at %d\n", ajSeqGetNameC(seq), primdata->Name, ioff); ajRegTrace(primdata->Primb); } testc = ajRegExec(primdata->Prima, revstr); if(testc) { ioff = ajStrGetLen(seqstr) - ajRegOffset(primdata->Prima); ajDebug("%s: (rev) %S PrimerA matched at %d\n", ajSeqGetNameC(seq), primdata->Name, ioff); ajFmtPrintF(out, "%s: (rev) %S PrimerA matched at %d\n", ajSeqGetNameC(seq), primdata->Name, ioff); ajRegTrace(primdata->Prima); } testd = ajRegExec(primdata->Primb, revstr); if(testd) { ioff = ajStrGetLen(seqstr) - ajRegOffset(primdata->Primb); ajDebug("%s: (rev) %S PrimerB matched at %d\n", ajSeqGetNameC(seq), primdata->Name, ioff); ajFmtPrintF(out, "%s: (rev) %S PrimerB matched at %d\n", ajSeqGetNameC(seq), primdata->Name, ioff); ajRegTrace(primdata->Primb); } return; }