static void remap_RenamePreferred(const AjPList list, const AjPTable table, AjPList newlist) { AjIList iter = NULL; AjPStr key = NULL; const AjPStr value = NULL; AjPStr name = NULL; iter = ajListIterNewread(list); while((key = (AjPStr)ajListIterGet(iter))) { /* ** If a key-value entry found, write the new value to the new list ** else write the old key name to the new list */ value = ajTableFetchS(table, key); name = ajStrNew(); if(value) { ajDebug("Rename: %S renamed to %S\n", key, value); ajStrAssignS(&name, value); } else { ajDebug("Rename: %S not found\n", key); ajStrAssignS(&name, key); } ajListstrPushAppend(newlist, name); } ajListIterDel(&iter); return; }
static void remap_RemoveMinMax(AjPList restrictlist, AjPTable hittable, ajint mincuts, ajint maxcuts) { AjIList miter; /* iterator for matches list */ EmbPMatMatch m = NULL; /* restriction enzyme match structure */ PValue value; AjPStr key = NULL; AjPStr keyv = NULL; key = ajStrNew(); /* if no hits then ignore much of this routine */ if(ajListGetLength(restrictlist)) { /* count the enzymes */ miter = ajListIterNewread(restrictlist); while((m = ajListIterGet(miter)) != NULL) { ajStrAssignS(&key, m->cod); /* increment the count of key */ value = (PValue) ajTableFetchmodS(hittable, key); if(value == NULL) { AJNEW0(value); value->count = 1; value->iso = ajStrNew(); ajStrAssignS(&(value->iso), m->iso); keyv = ajStrNew(); ajStrAssignS(&keyv,key); ajTablePut(hittable, (void *)keyv, (void *)value); } else value->count++; } ajListIterDel(&miter); /* now remove enzymes from restrictlist if <mincuts | >maxcuts */ miter = ajListIterNew(restrictlist); while((m = ajListIterGet(miter)) != NULL) { value = (PValue) ajTableFetchmodS(hittable, (m->cod)); if(value->count < mincuts || value->count > maxcuts) { ajListIterRemove(miter); embMatMatchDel(&m); } } ajListIterDel(&miter); } ajStrDel(&key); return; }
static void primersearch_print_hits(const AjPList primerList, AjPFile outf) { /* iterate through list of hits */ AjIList lIter; ajint count = 1; lIter = ajListIterNewread(primerList); while(!ajListIterDone(lIter)) { Primer primer = ajListIterGet(lIter); AjIList hIter = ajListIterNewread(primer->hitlist); count = 1; ajFmtPrintF(outf, "\nPrimer name %s\n", ajStrGetPtr(primer->Name)); while(!ajListIterDone(hIter)) { PHit hit = ajListIterGet(hIter); ajFmtPrintF(outf, "Amplimer %d\n", count); ajFmtPrintF(outf, "\tSequence: %s %s \n\t%s\n", ajStrGetPtr(hit->seqname), ajStrGetPtr(hit->acc), ajStrGetPtr(hit->desc)); ajFmtPrintF(outf, "\t%s hits forward strand at %d with %d " "mismatches\n", ajStrGetPtr(hit->forward), hit->forward_pos, hit->forward_mismatch); ajFmtPrintF(outf, "\t%s hits reverse strand at [%d] with %d " "mismatches\n", ajStrGetPtr(hit->reverse), (hit->reverse_pos), (hit->reverse_mismatch)); ajFmtPrintF(outf, "\tAmplimer length: %d bp\n", hit->amplen); count++; } ajListIterDel(&hIter); } ajListIterDel(&lIter); return; }
static void splitter_AddSubSeqFeat(AjPFeattable ftable, ajuint start, ajuint end, const AjPSeq oldseq) { AjPFeattable old_feattable = NULL; AjIList iter = NULL; old_feattable = ajSeqGetFeatCopy(oldseq); if(!old_feattable) return; iter = ajListIterNewread(old_feattable->Features); while(!ajListIterDone(iter)) { AjPFeature gf = ajListIterGet(iter); AjPFeature copy = NULL; if (((ajFeatGetEnd(gf) < start + 1) && (gf->End2 == 0 || gf->End2 < start + 1)) || ((ajFeatGetStart(gf) > end + 1) && (gf->Start2 == 0 || gf->Start2 > end + 1))) { continue; } copy = ajFeatNewFeat(gf); copy->Start = copy->Start - start; copy->End = copy->End - start; if (copy->Start2 > 0) copy->Start2 = copy->Start2 - start; if (copy->End2 > 0) copy->End2 = copy->End2 - start; ajFeatTrimOffRange (copy, 0, 1, end - start + 1, AJTRUE, AJTRUE); ajFeattableAdd(ftable, copy); } ajFeattableDel(&old_feattable); ajListIterDel(&iter); return; }
static void primersearch_clean_hitlist(AjPList* hlist) { AjIList lIter; lIter = ajListIterNewread(*hlist); while(!ajListIterDone(lIter)) { EmbPMatMatch fm = ajListIterGet(lIter); embMatMatchDel(&fm); } ajListFree(hlist); ajListFree(hlist); ajListIterDel(&lIter); return; }
static void supermatcher_matchListOrder(void **x,void *cl) { EmbPWordMatch p; AjPList ordered; ajint offset; AjIList listIter; concat *con; concat *c=NULL; p = (EmbPWordMatch)*x; ordered = (AjPList) cl; offset = (*p).seq1start-(*p).seq2start; /* iterate through ordered list to find if it exists already*/ listIter = ajListIterNewread(ordered); while(!ajListIterDone( listIter)) { con = ajListIterGet(listIter); if(con->offset == offset) { /* found so add count and set offset to the new value */ con->offset = offset; con->total+= (*p).length; con->count++; ajListPushAppend(con->list,p); ajListIterDel(&listIter); return; } } ajListIterDel(&listIter); /* not found so add it */ AJNEW(c); c->offset = offset; c->total = (*p).length; c->count = 1; c->list = ajListNew(); ajListPushAppend(c->list,p); ajListPushAppend(ordered, c); return; }
static void primersearch_primer_search(const AjPList primerList, const AjPSeq seq) { AjIList listIter; /* test each list node against this sequence */ listIter = ajListIterNewread(primerList); while(!ajListIterDone(listIter)) { Primer curr_primer = ajListIterGet(listIter); primersearch_scan_seq(curr_primer, seq, AJFALSE); primersearch_scan_seq(curr_primer, seq, AJTRUE); } ajListIterDel(&listIter); return; }
AjBool ajRestermlistClone(const AjPList src, AjPList dest) { AjIList iter; AjPResterm termout = NULL; AjPResterm termin = NULL; if(ajListGetLength(dest)) return ajFalse; iter = ajListIterNewread(src); while ((termin = (AjPResterm) ajListIterGet(iter))) { termout = ajRestermNewResterm(termin); ajListPushAppend(dest, termout); } ajListIterDel(&iter); return ajTrue; }
AjBool ajResquerylistClone(const AjPList src, AjPList dest) { AjIList iter; AjPResquery qryout = NULL; AjPResquery qryin = NULL; if(ajListGetLength(dest)) return ajFalse; iter = ajListIterNewread(src); while ((qryin = (AjPResquery) ajListIterGet(iter))) { qryout = ajResqueryNewResquery(qryin); ajListPushAppend(dest, qryout); } ajListIterDel(&iter); return ajTrue; }
static void primersearch_free_primer(void **x, void *cl) { Primer* p; Primer primdata; AjIList lIter; (void) cl; /* make it used */ p = (Primer*) x; primdata = *p; primersearch_free_pguts(&primdata->forward); primersearch_free_pguts(&primdata->reverse); ajStrDel(&primdata->Name); /* clean up hitlist */ lIter = ajListIterNewread(primdata->hitlist); while(!ajListIterDone(lIter)) { PHit phit = ajListIterGet(lIter); ajStrDel(&phit->forward); ajStrDel(&phit->reverse); ajStrDel(&phit->seqname); ajStrDel(&phit->acc); ajStrDel(&phit->desc); AJFREE(phit); } ajListFree(&primdata->hitlist); ajListFree(&primdata->hitlist); ajListIterDel(&lIter); AJFREE(primdata); return; }
int main(int argc, char **argv) { AjPList list = NULL; AjPSeq seq; AjPSeq seq2; AjPStr aa0str = 0; AjPStr aa1str = 0; const char *s1; const char *s2; char *strret = NULL; ajuint i; ajuint j; ajuint k; ajint l; ajint abovethresh; ajint total; ajint starti = 0; ajint startj = 0; ajint windowsize; float thresh; AjPGraph graph = NULL; AjPGraph xygraph = NULL; float flen1; float flen2; ajuint len1; ajuint len2; AjPTime ajtime = NULL; time_t tim; AjBool boxit=AJTRUE; /* Different ticks as they need to be different for x and y due to length of string being important on x */ ajuint acceptableticksx[]= { 1,10,50,100,500,1000,1500,10000, 500000,1000000,5000000 }; ajuint acceptableticks[]= { 1,10,50,100,200,500,1000,2000,5000,10000,15000, 500000,1000000,5000000 }; ajint numbofticks = 10; float xmargin; float ymargin; float ticklen; float tickgap; float onefifth; float k2; float max; char ptr[10]; AjPMatrix matrix = NULL; ajint** sub; AjPSeqCvt cvt; AjPStr subt = NULL; ajint b1; ajint b2; ajint e1; ajint e2; AjPStr se1; AjPStr se2; ajint ithresh; AjBool stretch; PPoint ppt = NULL; float xa[1]; float ya[1]; AjPGraphdata gdata=NULL; AjPStr tit = NULL; AjIList iter = NULL; float x1 = 0.; float x2 = 0.; float y1 = 0.; float y2 = 0.; ajuint tui; se1 = ajStrNew(); se2 = ajStrNew(); embInit("dotmatcher", argc, argv); seq = ajAcdGetSeq("asequence"); seq2 = ajAcdGetSeq("bsequence"); stretch = ajAcdGetToggle("stretch"); graph = ajAcdGetGraph("graph"); xygraph = ajAcdGetGraphxy("xygraph"); windowsize = ajAcdGetInt("windowsize"); ithresh = ajAcdGetInt("threshold"); matrix = ajAcdGetMatrix("matrixfile"); sub = ajMatrixGetMatrix(matrix); cvt = ajMatrixGetCvt(matrix); thresh = (float)ithresh; ajtime = ajTimeNew(); tim = time(0); ajTimeSetLocal(ajtime, tim); b1 = ajSeqGetBegin(seq); b2 = ajSeqGetBegin(seq2); e1 = ajSeqGetEnd(seq); e2 = ajSeqGetEnd(seq2); len1 = ajSeqGetLen(seq); len2 = ajSeqGetLen(seq2); tui = ajSeqGetLen(seq); flen1 = (float) tui; tui = ajSeqGetLen(seq2); flen2 = (float) tui; ajStrAssignSubC(&se1,ajSeqGetSeqC(seq),b1-1,e1-1); ajStrAssignSubC(&se2,ajSeqGetSeqC(seq2),b2-1,e2-1); ajSeqAssignSeqS(seq,se1); ajSeqAssignSeqS(seq2,se2); s1 = ajStrGetPtr(ajSeqGetSeqS(seq)); s2 = ajStrGetPtr(ajSeqGetSeqS(seq2)); aa0str = ajStrNewRes(1+len1); /* length plus trailing blank */ aa1str = ajStrNewRes(1+len2); list = ajListNew(); for(i=0;i<len1;i++) ajStrAppendK(&aa0str,(char)ajSeqcvtGetCodeK(cvt, *s1++)); for(i=0;i<len2;i++) ajStrAppendK(&aa1str,(char)ajSeqcvtGetCodeK(cvt, *s2++)); max = (float)len1; if(len2 > max) max = (float) len2; xmargin = ymargin = max *(float)0.15; ticklen = xmargin*(float)0.1; onefifth = xmargin*(float)0.2; subt = ajStrNewC((strret= ajFmtString("(windowsize = %d, threshold = %3.2f %D)", windowsize,thresh,ajtime))); if(!stretch) { if( ajStrGetLen(ajGraphGetSubtitleS(graph)) <=1) ajGraphSetSubtitleS(graph,subt); ajGraphOpenWin(graph, (float)0.0-ymargin,(max*(float)1.35)+ymargin, (float)0.0-xmargin,(float)max+xmargin); ajGraphicsDrawposTextAtmid(flen1*(float)0.5, (float)0.0-(xmargin/(float)2.0), ajGraphGetXlabelC(graph)); ajGraphicsDrawposTextAtlineJustify((float)0.0-(xmargin*(float)0.75), flen2*(float)0.5, (float)0.0-(xmargin*(float)0.75),flen1, ajGraphGetYlabelC(graph),0.5); ajGraphicsSetCharscale(0.5); } s1= ajStrGetPtr(aa0str); s2 = ajStrGetPtr(aa1str); for(j=0; (ajint)j < (ajint)len2-windowsize;j++) { i =0; total = 0; abovethresh =0; k = j; for(l=0;l<windowsize;l++) total = total + sub[(ajint)s1[i++]][(ajint)s2[k++]]; if(total >= thresh) { abovethresh=1; starti = i-windowsize; startj = k-windowsize; } while(i < len1 && k < len2) { total = total - sub[(ajint)s1[i-windowsize]] [(ajint)s2[k-windowsize]]; total = total + sub[(ajint)s1[i]][(ajint)s2[k]]; if(abovethresh) { if(total < thresh) { abovethresh = 0; /* draw the line */ dotmatcher_pushpoint(&list,(float)starti,(float)startj, (float)i-1,(float)k-1,stretch); } } else if(total >= thresh) { starti = i-windowsize; startj = k-windowsize; abovethresh= 1; } i++; k++; } if(abovethresh) /* draw the line */ dotmatcher_pushpoint(&list,(float)starti,(float)startj, (float)i-1,(float)k-1, stretch); } for(i=0; (ajint)i < (ajint)len1-windowsize;i++) { j = 0; total = 0; abovethresh =0; k = i; for(l=0;l<windowsize;l++) total = total + sub[(ajint)s1[k++]][(ajint)s2[j++]]; if(total >= thresh) { abovethresh=1; starti = k-windowsize; startj = j-windowsize; } while(k < len1 && j < len2) { total = total - sub[(ajint)s1[k-windowsize]] [(ajint)s2[j-windowsize]]; total = total + sub[(ajint)s1[k]][(ajint)s2[j]]; if(abovethresh) { if(total < thresh) { abovethresh = 0; /* draw the line */ dotmatcher_pushpoint(&list,(float)starti,(float)startj, (float)k-1,(float)j-1,stretch); } } else if(total >= thresh) { starti = k-windowsize; startj = j-windowsize; abovethresh= 1; } j++; k++; } if(abovethresh) /* draw the line */ dotmatcher_pushpoint(&list,(float)starti,(float)startj, (float)k-1,(float)j-1, stretch); } if(boxit && !stretch) { ajGraphicsDrawposRect(0.0,0.0,flen1, flen2); i=0; while(acceptableticksx[i]*numbofticks < len1) i++; if(i<=13) tickgap = (float)acceptableticksx[i]; else tickgap = (float)acceptableticksx[10]; ticklen = xmargin*(float)0.1; onefifth = xmargin*(float)0.2; if(len2/len1 > 10 ) { /* if a lot smaller then just label start and end */ ajGraphicsDrawposLine((float)0.0,(float)0.0,(float)0.0,(float)0.0-ticklen); sprintf(ptr,"%d",b1-1); ajGraphicsDrawposTextAtmid((float)0.0,(float)0.0-(onefifth),ptr); ajGraphicsDrawposLine(flen1,(float)0.0, flen1,(float)0.0-ticklen); sprintf(ptr,"%d",len1+b1-1); ajGraphicsDrawposTextAtmid(flen1,(float)0.0-(onefifth),ptr); } else for(k2=0.0;k2<len1;k2+=tickgap) { ajGraphicsDrawposLine(k2,(float)0.0,k2,(float)0.0-ticklen); sprintf(ptr,"%d",(ajint)k2+b1-1); ajGraphicsDrawposTextAtmid(k2,(float)0.0-(onefifth),ptr); } i = 0; while(acceptableticks[i]*numbofticks < len2) i++; tickgap = (float)acceptableticks[i]; ticklen = ymargin*(float)0.01; onefifth = ymargin*(float)0.02; if(len1/len2 > 10 ) { /* if a lot smaller then just label start and end */ ajGraphicsDrawposLine((float)0.0,(float)0.0,(float)0.0-ticklen,(float)0.0); sprintf(ptr,"%d",b2-1); ajGraphicsDrawposTextAtend((float)0.0-(onefifth),(float)0.0,ptr); ajGraphicsDrawposLine((float)0.0,flen2,(float)0.0-ticklen, flen2); sprintf(ptr,"%d",len2+b2-1); ajGraphicsDrawposTextAtend((float)0.0-(onefifth),flen2,ptr); } else for(k2=0.0;k2<len2;k2+=tickgap) { ajGraphicsDrawposLine((float)0.0,k2,(float)0.0-ticklen,k2); sprintf(ptr,"%d",(ajint)k2+b2-1); ajGraphicsDrawposTextAtend((float)0.0-(onefifth),k2,ptr); } } if(!stretch) ajGraphicsClose(); else /* the xy graph for -stretch */ { tit = ajStrNew(); ajFmtPrintS(&tit,"%S",ajGraphGetTitleS(xygraph)); gdata = ajGraphdataNewI(1); xa[0] = (float)b1; ya[0] = (float)b2; ajGraphSetTitleC(xygraph,ajStrGetPtr(tit)); ajGraphSetXlabelC(xygraph,ajSeqGetNameC(seq)); ajGraphSetYlabelC(xygraph,ajSeqGetNameC(seq2)); ajGraphdataSetTypeC(gdata,"2D Plot Float"); ajGraphdataSetTitleS(gdata,subt); ajGraphdataSetMinmax(gdata,(float)b1,(float)e1,(float)b2, (float)e2); ajGraphdataSetTruescale(gdata,(float)b1,(float)e1,(float)b2, (float)e2); ajGraphxySetXstartF(xygraph,(float)b1); ajGraphxySetXendF(xygraph,(float)e1); ajGraphxySetYstartF(xygraph,(float)b2); ajGraphxySetYendF(xygraph,(float)e2); ajGraphxySetXrangeII(xygraph,b1,e1); ajGraphxySetYrangeII(xygraph,b2,e2); if(list) { iter = ajListIterNewread(list); while((ppt = ajListIterGet(iter))) { x1 = ppt->x1+b1-1; y1 = ppt->y1+b2-1; x2 = ppt->x2+b1-1; y2 = ppt->y2+b2-1; ajGraphAddLine(xygraph,x1,y1,x2,y2,0); AJFREE(ppt); } ajListIterDel(&iter); } ajGraphdataAddXY(gdata,xa,ya); ajGraphDataReplace(xygraph,gdata); ajGraphxyDisplay(xygraph,ajFalse); ajGraphicsClose(); ajStrDel(&tit); } ajListFree(&list); ajSeqDel(&seq); ajSeqDel(&seq2); ajGraphxyDel(&graph); ajGraphxyDel(&xygraph); ajMatrixDel(&matrix); ajTimeDel(&ajtime); /* deallocate memory */ ajStrDel(&aa0str); ajStrDel(&aa1str); ajStrDel(&se1); ajStrDel(&se2); ajStrDel(&subt); AJFREE(strret); /* created withing ajFmtString */ embExit(); return 0; }
static void extractfeat_FeatSeqExtract(const AjPSeq seq, AjPSeqout seqout, AjPFeattable featab, ajint before, ajint after, AjBool join, AjBool featinname, const AjPStr describe) { AjIList iter = NULL; AjPFeature gf = NULL; AjBool single; /* ajtrue = is not a multiple */ AjBool parent; /* ajtrue = is a parent of a multiple */ AjBool child; /* ajTrue = is a child of a multiple */ AjBool compall; /* ajTrue = reverse comp all of join */ AjBool sense; /* ajTrue = forward sense */ AjBool remote; /* ajTrue = remote ID */ AjPStr type = NULL; /* name of feature */ AjPStr featseq = NULL; /* feature sequence string */ AjPStr tmpseq = NULL; /* temporary sequence string */ ajint firstpos; ajint lastpos; /* bounds of feature in sequence */ AjPStr describeout = NULL; /* tag names/values to add to descriptions */ ajuint count = 0; /* For all features... */ if(featab && ajFeattableGetSize(featab)) { /* initialise details of a feature */ featseq = ajStrNew(); tmpseq = ajStrNew(); type = ajStrNew(); remote = ajFalse; compall = ajFalse; sense = ajTrue; firstpos = 0; lastpos = 0; describeout = ajStrNew(); iter = ajListIterNewread(featab->Features); while(!ajListIterDone(iter)) { gf = ajListIterGet(iter) ; /* ** Determine what sort of thing this feature is. Only one of ** these will be true. ** True if this is part of a multiple join and it is not ** the parent */ child = ajFalse; /* True if this is part of a multiple join and it is the parent */ parent = ajFalse; /* True if this is not part of a multiple join */ single = ajFalse; if(ajFeatIsMultiple(gf)) { if(ajFeatIsChild(gf)) child = ajTrue; else parent = ajTrue; } else single = ajTrue; /* ** If not wish to assembling joins(), then force all features ** to be treated as single */ if(!join) { child = ajFalse; parent = ajFalse; single = ajTrue; } ajDebug("feature %S %d-%d is parent %B, child %B, single %B\n", ajFeatGetType(gf), ajFeatGetStart(gf), ajFeatGetEnd(gf), parent, child, single); /* ajUser("feature %S %d-%d is parent %B, child %B, single %B", ajFeatGetType(gf), ajFeatGetStart(gf), ajFeatGetEnd(gf), parent, child, single); */ /* ** If single or parent, write out any stored previous feature ** sequence */ if(count++ && !child) { extractfeat_WriteOut(seqout, &featseq, compall, sense, firstpos, lastpos, before, after, seq, remote, type, featinname, describeout); /* reset joined feature information */ ajStrSetClear(&featseq); ajStrSetClear(&tmpseq); ajStrSetClear(&type); ajStrSetClear(&describeout); remote = ajFalse; compall = ajFalse; sense = ajTrue; firstpos = 0; lastpos = 0; } /* if parent, note if have Complemented Join */ if(parent) compall = ajFeatIsCompMult(gf); /* ** Get the sense of the feature ** NB. if complementing several joined features, then pretend they ** are forward sense until its possible to reverse-complement ** them all together. */ if(!compall && ajFeatGetStrand(gf) == '-') sense = ajFalse; /* get 'type' name of feature */ if(single || parent) ajStrAssignS(&type, ajFeatGetType(gf)); /* ** if single or parent, get 'before' + 'after' sequence ** positions */ if(single || parent) { firstpos = ajFeatGetStart(gf)-1; lastpos = ajFeatGetEnd(gf)-1; } /* if child, update the boundary positions */ if(child) { if(sense) lastpos = ajFeatGetEnd(gf)-1; else firstpos = ajFeatGetStart(gf)-1; } extractfeat_MatchPatternDescribe(gf, describe, &describeout); /* get feature sequence(complement if required) */ if(!child) { if(join) ajFeatGetSeqJoin(gf, featab, seq, &tmpseq); else ajFeatGetSeq(gf, seq, &tmpseq); ajDebug("extracted feature = %d bases\n", ajStrGetLen(tmpseq)); /*ajUser("extracted feature = %d bases", ajStrGetLen(tmpseq));*/ ajStrAssignS(&featseq, tmpseq); } } ajListIterDel(&iter) ; /* ** write out any previous sequence(s) ** - add before + after, complement all */ extractfeat_WriteOut(seqout, &featseq, compall, sense, firstpos, lastpos, before, after, seq, remote, type, featinname, describeout); ajStrDel(&featseq); ajStrDel(&tmpseq); ajStrDel(&type); ajStrDel(&describeout); } return; }
static AjBool assemoutWriteNextBam(AjPOutfile outfile, const AjPAssem assem) { AjPFile outf = ajOutfileGetFile(outfile); AjPSeqBamHeader header = NULL; AjPAssemContig c = NULL; AjPSeqBam bam; AjPAssemRead r = NULL; AjPAssemContig* contigs = NULL; AjPAssemTag t = NULL; AjIList j = NULL; AjPSeqBamBgzf gzfile = NULL; AjPStr headertext=NULL; const AjPStr rgheadertext=NULL; AjBool ret = ajTrue; ajint i=0; ajulong ncontigs=0UL; if(!outf) return ajFalse; if(!assem) return ajFalse; if(!assem->Hasdata) { if(ajListGetLength(assem->ContigsOrder)) ncontigs = ajListToarray(assem->ContigsOrder, (void***)&contigs); else ncontigs = ajTableToarrayValues(assem->Contigs, (void***)&contigs); ajFmtPrintS(&headertext, "@HD\tVN:1.3\tSO:%s\n", ajAssemGetSortorderC(assem)); header = ajSeqBamHeaderNewN((ajuint) ncontigs); gzfile = ajSeqBamBgzfNew(ajFileGetFileptr(outf), "w"); outfile->OutData = gzfile; while (contigs[i]) /* contigs */ { c = contigs[i]; if(ajStrMatchC(c->Name, "*")) { i++; continue; } header->target_name[i] = strdup(ajStrGetPtr(c->Name)); header->target_len[i++] = c->Length; ajFmtPrintAppS(&headertext, "@SQ\tSN:%S\tLN:%d", c->Name, c->Length); if(c->URI) ajFmtPrintAppS(&headertext, "\tUR:%S", c->URI); if(c->MD5) ajFmtPrintAppS(&headertext, "\tM5:%S", c->MD5); if(c->Species) ajFmtPrintAppS(&headertext, "\tSP:%S", c->Species); ajFmtPrintAppS(&headertext, "\n"); j = ajListIterNewread(c->Tags); while (!ajListIterDone(j)) { t = ajListIterGet(j); ajFmtPrintAppS(&headertext, "@CO\t%S %u %u %S\n", t->Name, t->x1, t->y1, t->Comment); } ajListIterDel(&j); } rgheadertext = assemSAMGetReadgroupHeaderlines(assem); if(rgheadertext) ajStrAppendS(&headertext, rgheadertext); ajSeqBamHeaderSetTextC(header, ajStrGetPtr(headertext)); ajSeqBamHeaderWrite(gzfile, header); ajSeqBamHeaderDel(&header); ajStrDel(&headertext); AJFREE(contigs); if(!assem->BamHeader) return ajTrue; } /* data */ gzfile = outfile->OutData; AJNEW0(bam); bam->m_data=10; AJCNEW0(bam->data, bam->m_data); j = ajListIterNewread(assem->Reads); while (!ajListIterDone(j)) /* reads */ { r = ajListIterGet(j); assemoutWriteBamAlignment(gzfile, r, bam); } ajListIterDel(&j); AJFREE(bam->data); AJFREE(bam); /* ajSeqBamBgzfClose(gzfile);*/ return ret; }
void ajResourceTrace(const AjPResource thys) { AjIList iter; AjPStr tmpstr; ajuint i; AjPReslink reslink = NULL; AjPResquery resqry = NULL; AjPResterm resterm = NULL; if(!thys) { ajDebug("ajResourceTrace NULL\n"); return; } ajDebug("\najResourceTrace\n"); ajDebug(" Id: %S\n", thys->Id); ajDebug(" Acc: %S\n", thys->Acc); ajDebug(" Name: %S\n", thys->Name); ajDebug(" Desc: %S\n", thys->Desc); ajDebug(" Url: %S\n", thys->Url); ajDebug(" Idalt: %Lu\n", ajListGetLength(thys->Idalt)); if(ajListGetLength(thys->Idalt)) { i=0; iter = ajListIterNewread(thys->Idalt); while(!ajListIterDone(iter)) { tmpstr = ajListIterGet(iter); ajDebug("%15d: %S\n", ++i, tmpstr); } ajListIterDel(&iter); } ajDebug(" Cat: %Lu\n", ajListGetLength(thys->Cat)); if(ajListGetLength(thys->Cat)) { i = 0; iter = ajListIterNewread(thys->Cat); while(!ajListIterDone(iter)) { tmpstr = ajListIterGet(iter); ajDebug("%15d: %S\n", ++i, tmpstr); } ajListIterDel(&iter); } ajDebug(" Taxon: %Lu\n", ajListGetLength(thys->Taxon)); if(ajListGetLength(thys->Taxon)) { i = 0; iter = ajListIterNewread(thys->Taxon); while(!ajListIterDone(iter)) { resterm = ajListIterGet(iter); ajDebug("%15d: %S | %S\n", ++i, resterm->Id, resterm->Name); } ajListIterDel(&iter); } ajDebug(" Edamtpc: %Lu\n", ajListGetLength(thys->Edamtpc)); if(ajListGetLength(thys->Edamtpc)) { i = 0; iter = ajListIterNewread(thys->Edamtpc); while(!ajListIterDone(iter)) { resterm = ajListIterGet(iter); ajDebug("%15d: %S | %S\n", ++i, resterm->Id, resterm->Name); } ajListIterDel(&iter); } ajDebug(" Edamdat: %Lu\n", ajListGetLength(thys->Edamdat)); if(ajListGetLength(thys->Edamdat)) { i = 0; iter = ajListIterNewread(thys->Edamdat); while(!ajListIterDone(iter)) { resterm = ajListIterGet(iter); ajDebug("%15d: %S | %S\n", ++i, resterm->Id, resterm->Name); } ajListIterDel(&iter); } ajDebug(" Edamid: %Lu\n", ajListGetLength(thys->Edamid)); if(ajListGetLength(thys->Edamid)) { i = 0; iter = ajListIterNewread(thys->Edamid); while(!ajListIterDone(iter)) { resterm = ajListIterGet(iter); ajDebug("%15d: %S | %S\n", ++i, resterm->Id, resterm->Name); } ajListIterDel(&iter); } ajDebug(" Edamfmt: %Lu\n", ajListGetLength(thys->Edamfmt)); if(ajListGetLength(thys->Edamfmt)) { i = 0; iter = ajListIterNewread(thys->Edamfmt); while(!ajListIterDone(iter)) { resterm = ajListIterGet(iter); ajDebug("%15d: %S | %S\n", ++i, resterm->Id, resterm->Name); } ajListIterDel(&iter); } ajDebug(" Xref: %Lu\n", ajListGetLength(thys->Xref)); if(ajListGetLength(thys->Xref)) { i = 0; iter = ajListIterNewread(thys->Xref); while(!ajListIterDone(iter)) { reslink = ajListIterGet(iter); ajDebug("%15d: %S | %S\n", ++i, reslink->Source, reslink->Term); } ajListIterDel(&iter); } ajDebug(" Query: %Lu\n", ajListGetLength(thys->Query)); if(ajListGetLength(thys->Query)) { i = 0; iter = ajListIterNewread(thys->Query); while(!ajListIterDone(iter)) { resqry = ajListIterGet(iter); ajDebug("%15d: %S | %S | %S | %S\n", ++i, resqry->Datatype, resqry->Format, resqry->Term, resqry->Url); } ajListIterDel(&iter); } ajDebug(" Example: %Lu\n", ajListGetLength(thys->Example)); if(ajListGetLength(thys->Example)) { i = 0; iter = ajListIterNewread(thys->Example); while(!ajListIterDone(iter)) { tmpstr = ajListIterGet(iter); ajDebug("%15d: %S\n", ++i, tmpstr); } ajListIterDel(&iter); } return; }
static AjBool assemoutWriteNextSam(AjPOutfile outfile, const AjPAssem assem) { AjPFile outf = ajOutfileGetFile(outfile); AjPAssemContig c = NULL; AjPAssemRead r = NULL; AjPAssemTag t = NULL; AjPAssemContig* contigs = NULL; AjIList j = NULL; AjPStr argstr = NULL; const AjPStr headertext = NULL; ajint n = 0; ajulong i = 0UL; AjBool ret = ajTrue; if(!outf || !assem) return ajFalse; ajDebug("assemoutWriteSam: # of contigs = %d\n", n); if(!assem->Hasdata) { ajFmtPrintF(outf, "@HD\tVN:1.3\tSO:%s\n", ajAssemGetSortorderC(assem)); /* Program record */ argstr = ajStrNewS(ajUtilGetCmdline()); ajStrExchangeKK(&argstr, '\n', ' '); ajFmtPrintF(outf, "@PG\tID:%S\tVN:%S\tCL:%S\n", ajUtilGetProgram(), ajNamValueVersion(), argstr); ajStrDel(&argstr); if(ajListGetLength(assem->ContigsOrder)) ajListToarray(assem->ContigsOrder, (void***)&contigs); else ajTableToarrayValues(assem->Contigs, (void***)&contigs); while (contigs[i]) /* contigs */ { c = contigs[i++]; if(!ajStrMatchC(c->Name, "*")) { ajFmtPrintF(outf, "@SQ\tSN:%S\tLN:%d", c->Name, c->Length); if(c->URI) ajFmtPrintF(outf, "\tUR:%S", c->URI); if(c->MD5) ajFmtPrintF(outf, "\tM5:%S", c->MD5); if(c->Species) ajFmtPrintF(outf, "\tSP:%S", c->Species); ajFmtPrintF(outf, "\n"); j = ajListIterNewread(c->Tags); while (!ajListIterDone(j)) { t = ajListIterGet(j); ajFmtPrintF(outf, "@CO\t%S %u %u %S\n", t->Name, t->x1, t->y1, t->Comment); } ajListIterDel(&j); } } headertext = assemSAMGetReadgroupHeaderlines(assem); if(headertext) ajFmtPrintF(outf,"%S", headertext); AJFREE(contigs); if(!assem->BamHeader) return ajTrue; } /* data */ j = ajListIterNewread(assem->Reads); if(ajListGetLength(assem->ContigsOrder)) i = ajListToarray(assem->ContigsOrder, (void***)&contigs); else i = ajTableToarrayValues(assem->Contigs, (void***)&contigs); while (!ajListIterDone(j)) /* reads */ { r = ajListIterGet(j); assemoutWriteSamAlignment(outf, r, contigs, (ajuint) i); } ajListIterDel(&j); AJFREE(contigs); return ret; }
static AjBool assemoutWriteNextMaf(AjPOutfile outfile, const AjPAssem assem) { AjPFile outf = ajOutfileGetFile(outfile); AjPAssemContig c = NULL; AjPAssemRead r = NULL; AjPAssemTag t = NULL; ajint i = 0; ajint nreads = 0; AjIList j = NULL; AjIList k = NULL; AjIList reads = NULL; AjPAssemContig* contigs = NULL; if(!outf) return ajFalse; if(!assem) return ajFalse; if(!assem->Hasdata) { ajTableToarrayValues(assem->Contigs, (void***)&contigs); for (;contigs[i];i++) { c = contigs[i]; ajFmtPrintF(outf, "CO\t%S\n", c->Name); ajFmtPrintF(outf, "NR\t%d\n", c->Nreads); ajFmtPrintF(outf, "LC\t%d\n", c->Length); j = ajListIterNewread(c->Tags); while (!ajListIterDone(j)) { t = ajListIterGet(j); ajFmtPrintF(outf, "CT\t%S %u %u %S\n", t->Name, t->x1, t->y1, t->Comment); } ajListIterDel(&j); ajFmtPrintF(outf, "CS\t%S\n", c->Consensus); ajFmtPrintF(outf, "CQ\t%S\n", c->ConsensusQ); ajFmtPrintF(outf, "\\\\\n"); } AJFREE(contigs); return ajTrue; } /* data */ reads = ajListIterNewread(assem->Reads); while (!ajListIterDone(reads)) { nreads++; r = ajListIterGet(reads); if(r->Reference !=i) ajErr("different reference/contig number(%d) than expected(%d)" "\nreads were expected to be sorted by contigs"); ajFmtPrintF(outf, "RD\t%S\n", r->Name); ajFmtPrintF(outf, "RS\t%S\n", r->Seq); ajFmtPrintF(outf, "RQ\t%S\n", r->SeqQ); ajFmtPrintF(outf, "TN\t%S\n", r->Template); if(r->Direction) ajFmtPrintF(outf, "DI\t%c\n", r->Direction); if(r->TemplateSizeMin) ajFmtPrintF(outf, "TF\t%d\n", r->TemplateSizeMin); if(r->TemplateSizeMax) ajFmtPrintF(outf, "TT\t%d\n", r->TemplateSizeMax); if(r->File) ajFmtPrintF(outf, "SF\t%S\n", r->File); if(r->VectorLeft) ajFmtPrintF(outf, "SL\t%d\n", r->VectorLeft); if(r->VectorRight) ajFmtPrintF(outf, "SR\t%d\n", r->VectorRight); if(r->QualLeft) ajFmtPrintF(outf, "QL\t%d\n", r->QualLeft); if(r->QualRight) ajFmtPrintF(outf, "QR\t%d\n", r->QualRight); if(r->ClipLeft) ajFmtPrintF(outf, "SL\t%d\n", r->ClipLeft); if(r->ClipRight) ajFmtPrintF(outf, "SR\t%d\n", r->ClipRight); k = ajListIterNewread(r->Tags); while (!ajListIterDone(k)) { t = ajListIterGet(k); ajFmtPrintF(outf, "RT\t%S %u %u\n", t->Name, t->x1, t->y1); } ajListIterDel(&k); ajFmtPrintF(outf, "ST\t%S\n", r->Technology); ajFmtPrintF(outf, "ER\n"); ajFmtPrintF(outf, "AT\t%u %u %u %u\n", r->x1, r->y1, r->x2, r->y2); } ajListIterDel(&j); ajListIterDel(&reads); if(!nreads) { ajFmtPrintF(outf, "//\n"); ajFmtPrintF(outf, "EC\n"); } return ajTrue; }
static AjBool assemoutWriteBamAlignment(AjPSeqBamBgzf gzfile, const AjPAssemRead r, AjPSeqBam bam) { AjPSeqBamCore c; AjPAssemTag tag; unsigned char *dpos; const char *s; ajuint ilen; ajuint slen; ajuint i; AjIList l = NULL; /* optional fields */ ajint tagvalsize = 0; const unsigned char* tagval = 0; ajint intval =0; /* processing cigar strings*/ char *t; int op; long x; unsigned char bam_nt16_table[256] = { 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0 /*=*/,15,15, 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15, 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15, 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15, 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15 }; /* bam_write1 for each alignment */ c = &bam->core; ilen = ajStrGetLen(r->Seq); c->tid = (int) r->Reference; if(r->Flag & BAM_FREVERSE) c->pos = r->y1-1; else c->pos = r->x1-1; /* BAM format is zero based; -1 is translated to 0, meaning unmapped */ c->bin = 0; c->qual = r->MapQ; c->l_qname = 1 + ajStrGetLen(r->Name); c->flag = r->Flag; c->n_cigar = 0; c->l_qseq = ilen; c->mtid = (int) r->Rnext; c->mpos = (int) r->Pnext-1; c->isize = r->Tlen; /* get cigar string length */ s = ajStrGetPtr(r->Cigar); if (strcmp(s,"*")) /* '*' means unavailable */ { for (; *s; ++s) { if ((isalpha((int)*s)) || (*s=='=')) ++c->n_cigar; else if (!isdigit((int)*s)) ajWarn("invalid CIGAR character: %c\n", *s); } } bam->data_len = c->n_cigar*4 + c->l_qname + (ilen + 1)/2 + ilen; /* allocation for optional tags are made as they are appended */ if(bam->data_len > bam->m_data) { AJCRESIZE0(bam->data,bam->m_data, bam->data_len); bam->m_data = bam->data_len; } dpos = bam->data; /* copy query name to bam->data */ memcpy(dpos, ajStrGetPtr(r->Name), c->l_qname); dpos += c->l_qname; /* copy cigar string to bam->data */ s = ajStrGetPtr(r->Cigar); for (i = 0; i != c->n_cigar; ++i) { x = strtol(s, &t, 10); op = toupper((int)*t); if (op == 'M') op = BAM_CMATCH; else if (op == 'I') op = BAM_CINS; else if (op == 'D') op = BAM_CDEL; else if (op == 'N') op = BAM_CREF_SKIP; else if (op == 'S') op = BAM_CSOFT_CLIP; else if (op == 'H') op = BAM_CHARD_CLIP; else if (op == 'P') op = BAM_CPAD; else if (op == '=') op = BAM_CEQUAL; else if (op == 'X') op = BAM_CDIFF; else ajWarn("invalid CIGAR operation: %c",op); s = t + 1; ((ajuint*)dpos)[i] = x << BAM_CIGAR_SHIFT | op; } if (*s && c->n_cigar) ajWarn("unmatched CIGAR operation: %c", *s); c->bin = ajSeqBamReg2bin(c->pos, ajSeqBamCalend(c, MAJSEQBAMCIGAR(bam))); dpos += c->n_cigar*4; /* copy sequence string to bam->data */ s = ajStrGetPtr(r->Seq); slen = (ilen+1)/2; for (i = 0; i < slen; ++i) dpos[i] = 0; for (i = 0; i < ilen; ++i) dpos[i/2] |= bam_nt16_table[(ajuint)s[i]] << 4*(1-i%2); dpos += slen; /* copy quality values to bam->data */ if(r->SeqQ && !ajStrMatchC(r->SeqQ, "*")) { s = ajStrGetPtr(r->SeqQ); for(i=0;i<ilen;i++) dpos[i]= s[i]-33; } else for(i=0;i<ilen;i++) dpos[i]= 0xff; l = ajListIterNewread(r->Tags); bam->l_aux=0; while (!ajListIterDone(l)) { tag = ajListIterGet(l); /* TODO: array type 'B' and other types */ if(tag->type == 'i' || tag->type == 'I') { tagvalsize = 4; ajStrToInt(tag->Comment, &intval); tagval = (unsigned char*)&intval; } else if(tag->type =='s' || tag->type =='S') { tagvalsize = 2; ajStrToInt(tag->Comment, &intval); tagval = (unsigned char*)&intval; } else if(tag->type =='c' || tag->type =='C') { tagvalsize = 1; ajStrToInt(tag->Comment, &intval); tagval = (unsigned char*)&intval; } else if(tag->type =='A') { tagvalsize = 1; tagval = (const unsigned char*)ajStrGetPtr(tag->Comment); } else if(tag->type =='Z') { tagvalsize = ajStrGetLen(tag->Comment)+1; tagval = (const unsigned char*)ajStrGetPtr(tag->Comment); } else { ajWarn("tag type '%c' not yet supported",tag->type); continue; } ajSeqBamAuxAppend(bam, ajStrGetPtr(tag->Name), tag->type, tagvalsize, tagval); } ajListIterDel(&l); ajSeqBamWrite(gzfile, bam); return ajTrue; }
void ajResourceDel(AjPResource *Presource) { AjPResource thys; AjPReslink reslink; AjPResquery resqry; AjPResterm resterm; AjIList iter; if(!Presource) return; if(!(*Presource)) return; thys = *Presource; ajStrDel(&thys->Id); ajListstrFreeData(&thys->Idalt); ajStrDel(&thys->Acc); ajStrDel(&thys->Name); ajStrDel(&thys->Desc); ajStrDel(&thys->Url); ajStrDel(&thys->Urllink); ajStrDel(&thys->Urlrest); ajStrDel(&thys->Urlsoap); ajListstrFreeData(&thys->Cat); iter = ajListIterNewread(thys->Taxon); while(!ajListIterDone(iter)) { resterm = ajListIterGet(iter); ajRestermDel(&resterm); } ajListIterDel(&iter); ajListFree(&thys->Taxon); iter = ajListIterNewread(thys->Edamdat); while(!ajListIterDone(iter)) { resterm = ajListIterGet(iter); ajRestermDel(&resterm); } ajListIterDel(&iter); ajListFree(&thys->Edamdat); iter = ajListIterNewread(thys->Edamfmt); while(!ajListIterDone(iter)) { resterm = ajListIterGet(iter); ajRestermDel(&resterm); } ajListIterDel(&iter); ajListFree(&thys->Edamfmt); iter = ajListIterNewread(thys->Edamid); while(!ajListIterDone(iter)) { resterm = ajListIterGet(iter); ajRestermDel(&resterm); } ajListIterDel(&iter); ajListFree(&thys->Edamid); iter = ajListIterNewread(thys->Edamtpc); while(!ajListIterDone(iter)) { resterm = ajListIterGet(iter); ajRestermDel(&resterm); } ajListIterDel(&iter); ajListFree(&thys->Edamtpc); iter = ajListIterNewread(thys->Xref); while(!ajListIterDone(iter)) { reslink = ajListIterGet(iter); ajReslinkDel(&reslink); } ajListIterDel(&iter); ajListFree(&thys->Xref); iter = ajListIterNewread(thys->Query); while(!ajListIterDone(iter)) { resqry = ajListIterGet(iter); ajResqueryDel(&resqry); } ajListIterDel(&iter); ajListFree(&thys->Query); ajListstrFreeData(&thys->Example); ajStrDel(&thys->Db); ajStrDel(&thys->Setdb); ajStrDel(&thys->Full); ajStrDel(&thys->Qry); ajStrDel(&thys->Formatstr); ajStrDel(&thys->Filename); ajStrDel(&thys->TextPtr); AJFREE(*Presource); *Presource = NULL; return; }
int main(int argc, char **argv) { AjPSeqset seqset; AjPSeqall seqall; AjPSeq queryseq; const AjPSeq targetseq; ajint wordlen; AjPTable wordsTable = NULL; AjPList* matchlist = NULL; AjPFile logfile; AjPFeattable* seqsetftables = NULL; AjPFeattable seqallseqftable = NULL; AjPFeattabOut ftoutforseqsetseq = NULL; AjPFeattabOut ftoutforseqallseq = NULL; AjPAlign align = NULL; AjIList iter = NULL; ajint targetstart; ajint querystart; ajint len; ajuint i, j; ajulong nAllMatches = 0; ajulong sumAllScore = 0; AjBool dumpAlign = ajTrue; AjBool dumpFeature = ajTrue; AjBool checkmode = ajFalse; EmbPWordRK* wordsw = NULL; ajuint npatterns = 0; ajuint seqsetsize; ajuint nmatches; ajuint* nmatchesseqset; ajuint* lastlocation; /* Cursors for Rabin-Karp search. */ /* Shows until what point the query sequence was * scanned for a pattern sequences in the seqset. */ char* paddedheader = NULL; const char* header; AjPStr padding; header = "Pattern %S #pat-sequences #all-matches avg-match-length\n"; padding = ajStrNew(); embInit("wordmatch", argc, argv); wordlen = ajAcdGetInt("wordsize"); seqset = ajAcdGetSeqset("asequence"); seqall = ajAcdGetSeqall("bsequence"); logfile = ajAcdGetOutfile("logfile"); dumpAlign = ajAcdGetToggle("dumpalign"); dumpFeature = ajAcdGetToggle("dumpfeat"); if(dumpAlign) { align = ajAcdGetAlign("outfile"); ajAlignSetExternal(align, ajTrue); } seqsetsize = ajSeqsetGetSize(seqset); ajSeqsetTrim(seqset); AJCNEW0(matchlist, seqsetsize); AJCNEW0(seqsetftables, seqsetsize); AJCNEW0(nmatchesseqset, seqsetsize); if (dumpFeature) { ftoutforseqsetseq = ajAcdGetFeatout("aoutfeat"); ftoutforseqallseq = ajAcdGetFeatout("boutfeat"); } checkmode = !dumpFeature && !dumpAlign; embWordLength(wordlen); ajFmtPrintF(logfile, "Small sequence/file for constructing" " target patterns: %S\n", ajSeqsetGetUsa(seqset)); ajFmtPrintF(logfile, "Large sequence/file to be scanned" " for patterns: %S\n", ajSeqallGetUsa(seqall)); ajFmtPrintF(logfile, "Number of sequences in the patterns file: %u\n", seqsetsize); ajFmtPrintF(logfile, "Pattern/word length: %u\n", wordlen); for(i=0;i<seqsetsize;i++) { targetseq = ajSeqsetGetseqSeq(seqset, i); embWordGetTable(&wordsTable, targetseq); } AJCNEW0(lastlocation, seqsetsize); if(ajTableGetLength(wordsTable)>0) { npatterns = embWordRabinKarpInit(wordsTable, &wordsw, wordlen, seqset); ajFmtPrintF(logfile, "Number of patterns/words found: %u\n", npatterns); while(ajSeqallNext(seqall,&queryseq)) { for(i=0;i<seqsetsize;i++) { lastlocation[i]=0; if (!checkmode) matchlist[i] = ajListstrNew(); } nmatches = embWordRabinKarpSearch( ajSeqGetSeqS(queryseq), seqset, (EmbPWordRK const *)wordsw, wordlen, npatterns, matchlist, lastlocation, checkmode); nAllMatches += nmatches; if (checkmode) continue; for(i=0;i<seqsetsize;i++) { if(ajListGetLength(matchlist[i])>0) { iter = ajListIterNewread(matchlist[i]) ; while(embWordMatchIter(iter, &targetstart, &querystart, &len, &targetseq)) { if(dumpAlign) { ajAlignDefineSS(align, targetseq, queryseq); ajAlignSetScoreI(align, len); /* ungapped alignment means same length * for both sequences */ ajAlignSetSubRange(align, targetstart, 1, len, ajSeqIsReversed(targetseq), ajSeqGetLen(targetseq), querystart, 1, len, ajSeqIsReversed(queryseq), ajSeqGetLen(queryseq)); } } if(dumpAlign) { ajAlignWrite(align); ajAlignReset(align); } if(ajListGetLength(matchlist[i])>0 && dumpFeature) { embWordMatchListConvToFeat(matchlist[i], &seqsetftables[i], &seqallseqftable, targetseq, queryseq); ajFeattableWrite(ftoutforseqallseq, seqallseqftable); ajFeattableDel(&seqallseqftable); } ajListIterDel(&iter); } embWordMatchListDelete(&matchlist[i]); } } /* search completed, now report statistics */ for(i=0;i<npatterns;i++) { sumAllScore += wordsw[i]->lenMatches; for(j=0;j<wordsw[i]->nseqs;j++) nmatchesseqset[wordsw[i]->seqindxs[j]] += wordsw[i]->nSeqMatches[j]; } ajFmtPrintF(logfile, "Number of sequences in the file scanned " "for patterns: %u\n", ajSeqallGetCount(seqall)); ajFmtPrintF(logfile, "Number of all matches: %Lu" " (wordmatch finds exact matches only)\n", nAllMatches); if(nAllMatches>0) { ajFmtPrintF(logfile, "Sum of match lengths: %Lu\n", sumAllScore); ajFmtPrintF(logfile, "Average match length: %.2f\n", sumAllScore*1.0/nAllMatches); ajFmtPrintF(logfile, "\nDistribution of the matches among pattern" " sequences:\n"); ajFmtPrintF(logfile, "-----------------------------------------" "-----------\n"); for(i=0;i<ajSeqsetGetSize(seqset);i++) { if (nmatchesseqset[i]>0) ajFmtPrintF(logfile, "%-42s: %8u\n", ajSeqGetNameC(ajSeqsetGetseqSeq(seqset, i)), nmatchesseqset[i]); ajFeattableWrite(ftoutforseqsetseq, seqsetftables[i]); ajFeattableDel(&seqsetftables[i]); } ajFmtPrintF(logfile, "\nPattern statistics:\n"); ajFmtPrintF(logfile, "-------------------\n"); if(wordlen>7) ajStrAppendCountK(&padding, ' ', wordlen-7); paddedheader = ajFmtString(header,padding); ajFmtPrintF(logfile, paddedheader); for(i=0;i<npatterns;i++) if (wordsw[i]->nMatches>0) ajFmtPrintF(logfile, "%-7s: %12u %12u %17.2f\n", wordsw[i]->word->fword, wordsw[i]->nseqs, wordsw[i]->nMatches, wordsw[i]->lenMatches*1.0/wordsw[i]->nMatches); } } for(i=0;i<npatterns;i++) { for(j=0;j<wordsw[i]->nseqs;j++) AJFREE(wordsw[i]->locs[j]); AJFREE(wordsw[i]->locs); AJFREE(wordsw[i]->seqindxs); AJFREE(wordsw[i]->nnseqlocs); AJFREE(wordsw[i]->nSeqMatches); AJFREE(wordsw[i]); } embWordFreeTable(&wordsTable); AJFREE(wordsw); AJFREE(matchlist); AJFREE(lastlocation); AJFREE(nmatchesseqset); AJFREE(seqsetftables); if(dumpAlign) { ajAlignClose(align); ajAlignDel(&align); } if(dumpFeature) { ajFeattabOutDel(&ftoutforseqsetseq); ajFeattabOutDel(&ftoutforseqallseq); } ajFileClose(&logfile); ajSeqallDel(&seqall); ajSeqsetDel(&seqset); ajSeqDel(&queryseq); ajStrDel(&padding); AJFREE(paddedheader); embExit(); return 0; }
AjBool ajResourceGetDbdata(const AjPResource resource, AjPQuery qry, AjBool findformat(const AjPStr format, ajint *iformat)) { AjIList iter; AjPResquery resqry = NULL; ajint format; AjBool ret = ajFalse; if(!resource) return ajFalse; qry->InDrcat = ajTrue; ajStrAssignC(&qry->DbType, ajNamQueryGetDatatypeC(qry)); ajDebug("ajResourceGetDbdata dbtype %S %d\n", qry->DbType, qry->DataType); if(qry->DataType == AJDATATYPE_URL) { ajStrAssignC(&qry->Method, "urlonly"); qry->QueryType = AJQUERY_ENTRY; qry->HasAcc = ajFalse; iter = ajListIterNewread(resource->Query); while(!ajListIterDone(iter)) { resqry = ajListIterGet(iter); ajDebug("ajResourceGetDbdata test fmt: '%S' edam: '%S'\n", resqry->Format, resqry->FormatTerm); if(ajStrMatchC(resqry->FormatTerm, "2331")) { ajDebug(" OK fmt: '%S' edam: '%S' url '%S'\n", resqry->Format, resqry->FormatTerm, resqry->Url); ajStrAssignS(&qry->Formatstr, resqry->FormatTerm); ajStrAssignS(&qry->DbUrl, resqry->Url); ret = ajTrue; } } ajListIterDel(&iter); } else { ajStrAssignC(&qry->Method, "url"); qry->QueryType = AJQUERY_ENTRY; qry->HasAcc = ajFalse; iter = ajListIterNewread(resource->Query); while(!ajListIterDone(iter)) { resqry = ajListIterGet(iter); ajDebug("ajResourceGetDbdata test fmt: '%S' edam: '%S'\n", resqry->Format, resqry->FormatTerm); if(findformat(resqry->Format, &format)) { ajDebug(" OK fmt: '%S' url '%S'\n", resqry->Format, resqry->Url); ajStrAssignS(&qry->Formatstr, resqry->Format); ajStrAssignS(&qry->DbUrl, resqry->Url); ret = ajTrue; } if(ajStrGetLen(resqry->FormatTerm) && findformat(resqry->FormatTerm, &format)) { ajDebug(" OK edam: '%S' url '%S'\n", resqry->FormatTerm, resqry->Url); ajStrAssignS(&qry->Formatstr, resqry->FormatTerm); ajStrAssignS(&qry->DbUrl, resqry->Url); ret = ajTrue; } } ajListIterDel(&iter); } return ret; }
int main(ajint argc, char **argv) { AjPList ccfin = NULL; /* List of CCF (input) files. */ AjPDir pdbin = NULL; /* Path of pdb input files. */ AjPStr pdbprefix = NULL; /* Prefix of pdb input files. */ AjPStr pdb_name = NULL; /* Full name (path/name/extension) of pdb format input file. */ AjPDirout ccfout = NULL; /* Path of coordinate output file. */ AjPStr randomname = NULL; /* Name for temp file tempf. */ AjPStr ccf_this = NULL; AjPStr exec = NULL; AjPStr naccess_str = NULL; AjPStr line = NULL; AjPStr syscmd = NULL; /* Command line arguments. */ AjPStr *mode = NULL; /* Mode of operation from acd. */ AjPFile errf = NULL; /* pdbplus error file pointer. */ AjPFile serrf = NULL; /* stride error file pointer. */ AjPFile nerrf = NULL; /* stride error file pointer. */ AjPFile tempf = NULL; /* Temp file for holding STRIDE output. */ AjPFile ccf_inf = NULL; /* Protein coordinate input file. */ AjPFile ccf_outf = NULL; /* Protein coordinate output file. */ AjIList iter = NULL; AjBool done_naccess= ajFalse; AjBool done_stride = ajFalse; AjBool found = ajFalse; AjPResidue temp_res = NULL; /* Pointer to Residue object. */ AjPPdb pdb_old = NULL; /* Pointer to PDB object - without new stride elements. */ AjPPdb pdb = NULL; /* Pointer to PDB object. */ ajint idn = 0; /* Chain identifier as a number (1,2,...) */ ajint chain_num = 0; /* Chain identifier index (0,1,...). */ ajint tS = 0; /* User-defined threshold size for SSEs. */ ajint nostride = 0; /* No. times stride failed */ ajint nonaccess = 0; /* No. times naccess failed */ ajint nofile = 0; /* No. times of file error */ /* Variables for each item that will be parsed from the ASG line. */ AjPStr res = NULL; /* Residue id from STRIDE ASG line (ALA etc). */ AjPStr res_num = NULL; /* PDB residue number from STRIDE ASG line. */ char pcid = ' '; /* Protein chain identifier from STRIDE or NACESS output (A,B, etc). */ char ss = ' '; /* One-letter secondary structure code from STRIDE ASG line. */ float ph = 0.0; /* Phi angle from STRIDE ASG line. */ float ps = 0.0; /* Psi angle from STRIDE ASG line. */ float sa = 0.0; /* Residue solvent accessible area from STRIDE ASG line. */ float f1 = 0; float f2 = 0; float f3 = 0; float f4 = 0; float f5 = 0; float f6 = 0; float f7 = 0; float f8 = 0; float f9 = 0; float f10 = 0; /* Allocate strings; this section is used for variables that are allocated once only. */ pdb_name = ajStrNew(); res = ajStrNew(); res_num = ajStrNew(); randomname = ajStrNew(); syscmd = ajStrNew(); line = ajStrNew(); naccess_str = ajStrNew(); exec = ajStrNew(); /* Read data from acd. */ embInitPV("pdbplus",argc,argv,"STRUCTURE",VERSION); ccfin = ajAcdGetDirlist("ccfinpath"); pdbin = ajAcdGetDirectory("pdbindir"); pdbprefix = ajAcdGetString("pdbprefix"); ccfout = ajAcdGetOutdir("ccfoutdir"); mode = ajAcdGetList("mode"); errf = ajAcdGetOutfile("logfile"); if(ajStrGetCharFirst(*mode) != '2') serrf = ajAcdGetOutfile("slogfile"); if(ajStrGetCharFirst(*mode) != '1') nerrf = ajAcdGetOutfile("nlogfile"); tS = ajAcdGetInt("thresholdsize"); ajRandomSeed(); ajFilenameSetTempname(&randomname); /* ** Start of main application loop. ** Process each PDB/ protein coordinate file (EMBL format) in turn. */ while(ajListPop(ccfin,(void **)&ccf_this)) { /* Open protein coordinate file. If it cannot be opened, write a message to the error file, delete ccf_this and continue. */ if((ccf_inf = ajFileNewInNameS(ccf_this)) == NULL) { ajWarn("%s%S\n//\n", "clean coordinate file not found: ", ccf_this); ajFmtPrintF(errf, "%s%S\n//\n", "clean coordinate file not found: ", ccf_this); ajStrDel(&ccf_this); nofile++; continue; } ajFmtPrint("Processing %S\n", ccf_this); fflush(stdout); /* Parse protein coordinate data (from clean format file) into AjPPdb object. ajPdbReadAllModelsNew will create the AjPPdb object. */ if(!(pdb_old=ajPdbReadAllModelsNew(ccf_inf))) { ajWarn("ERROR Clean coordinate file read" "error: %S\n//\n", ccf_this); ajFmtPrintF(errf, "ERROR Clean coordinate file read" "error: %S\n//\n", ccf_this); ajFileClose(&ccf_inf); ajStrDel(&ccf_this); nofile++; continue; } ajFileClose(&ccf_inf); ajPdbCopy(&pdb, pdb_old); ajPdbDel(&pdb_old); /* Construct name of corresponding PDB file. NACCESS does *not* generate an output file if the path is './' e.g. naccess ./1rbp.ent , therefore replace './' with null. */ ajStrAssignS(&pdb_name, ajDirGetPath(pdbin)); if(ajStrMatchC(pdb_name, "./") || ajStrMatchC(pdb_name, ".")) ajStrAssignC(&pdb_name, ""); ajStrAppendS(&pdb_name, pdbprefix); ajStrFmtLower(&pdb->Pdb); ajStrAppendS(&pdb_name, pdb->Pdb); ajStrAppendC(&pdb_name, "."); ajStrAppendS(&pdb_name, ajDirGetExt(pdbin)); /* Check corresponding PDB file exists for reading using ajFileStat. */ if(!(ajFilenameExistsRead(pdb_name))) { ajFmtPrintF(errf, "%s%S\n//\n", "PDB file not found: ", pdb_name); ajWarn("%s%S\n//\n", "PDB file not found: ", pdb_name); ajStrDel(&ccf_this); ajPdbDel(&pdb); nofile++; continue; } if(ajStrGetCharFirst(*mode) != '2') { /* ** Create a string containing the STRIDE command line (it needs ** PDB file name & name of temp output file). ** Call STRIDE by using ajSystem. */ ajFmtPrintS(&syscmd, "%S %S -f%S >> %s 2>&1", ajAcdGetpathC("stride"), pdb_name, randomname, ajFileGetNameC(serrf)); ajFmtPrint("%S %S -f%S >> %s 2>&1\n", ajAcdGetpathC("stride"), pdb_name, randomname,ajFileGetNameC(serrf)); system(ajStrGetPtr(syscmd)); /* Open the stride output file */ if (((tempf = ajFileNewInNameS(randomname)) == NULL)) { ajWarn("%s%S\n//\n", "no stride output for: ", pdb_name); ajFmtPrintF(errf, "%s%S\n//\n", "no stride output for: ", pdb_name); nostride++; ajStrDel(&ccf_this); ajPdbDel(&pdb); continue; } else ajFmtPrintF(errf, "%s%S\n//\n", "stride output for: ", pdb_name); done_stride = ajFalse; /* Parse STRIDE output from temp output file a line at a time. */ while(ajReadlineTrim(tempf,&line)) { if(ajStrPrefixC(line,"ASG")) { ajFmtScanS(line, "%*S %S %c %S %*d %c %*S %f %f %f %*S", &res, &pcid, &res_num, &ss, &ph, &ps, &sa); /* ** Populate pdbplus object with the data from this parsed ** line. This means first identifying the chain, then ** finding the residue. */ /* Determine the chain number. ajDmxPdbplusChain does not recognise '-', so change '-' to '.' */ if (pcid == '-') pcid = '.'; /* Get chain number from the chain identifier. */ if(!ajPdbChnidToNum(pcid, pdb, &idn)) { ajWarn("Could not convert chain id %c to chain" " number in pdb file %S\n//\n", pcid, pdb_name); ajFmtPrintF(errf, "Could not convert chain id %c " "to chain number in pdb file %S\n//\n", pcid, pdb_name); continue; } /* ** The chain number that will get written starts at 1, but ** we want an index into an array which must start at 0, ** so subtract 1 from the chain number to get the index. */ chain_num = idn-1; /* ** Iiterate through the list of residues in the Pdb object, ** found switches to true when first residue corresponding ** to the line is found. */ /* iter = ajListIterNewread(pdb->Chains[chain_num]->Atoms); */ iter = ajListIterNewread(pdb->Chains[chain_num]->Residues); found = ajFalse; while((temp_res = (AjPResidue)ajListIterGet(iter))) { /* If we have found the residue we want */ if((ajStrMatchS(res_num, temp_res->Pdb) && ajStrMatchS(res, temp_res->Id3))) { done_stride = ajTrue; found = ajTrue; temp_res->eStrideType = ss; temp_res->Phi = ph; temp_res->Psi = ps; temp_res->Area = sa; } /* If the matching residue has been processed move on to next ASG line, next residue. */ else if(found == ajTrue) break; else /* Matching residue not found yet. */ continue; } ajListIterDel(&iter); } /* End of if ASG loop. */ } /* End of while line loop. */ if(done_stride) ajFmtPrintF(errf, "%s%S\n//\n", "stride data for: ", pdb_name); else { ajFmtPrintF(errf, "%s%S\n//\n", "no stride data for: ", pdb_name); ajWarn("%s%S\n//\n", "no stride data for: ", pdb_name); nostride++; } /* Close STRIDE temp file. & tidy up. */ ajFileClose(&tempf); /* Remove temporary file (stride output file). */ ajFmtPrintS(&exec, "rm %S", randomname); ajSysSystem(exec); /* ** Calculate element serial numbers (eStrideNum)& ammend residue ** objects, count no's of elements and ammend chain object ** (numHelices, num Strands). */ pdbplus_sort(pdb, tS); } if(ajStrGetCharFirst(*mode) != '1') { /* ** Create a string containing the NACCESS command line (it needs ** PDB file name & name of temp output file) & call NACCESS. ** If e.g. /data/structure/pdbfred.ent was parsed and the program ** was run from /stuff, then /stuff/fred.asa and /stuff/fred.rsa ** would be written. These must be deleted once parsed (only ** use the .rsa file here). */ ajFmtPrintS(&syscmd, "%S %S >> %s 2>&1", ajAcdGetpathC("naccess"), pdb_name, ajFileGetNameC(nerrf)); ajFmtPrint("%S %S >> %s 2>&1\n", ajAcdGetpathC("naccess"), pdb_name, ajFileGetNameC(nerrf)); system(ajStrGetPtr(syscmd)); ajStrAssignS(&naccess_str, pdbprefix); ajStrAppendS(&naccess_str, pdb->Pdb); ajStrAppendC(&naccess_str, ".rsa"); /* Open the NACCESS output file. */ if (((tempf = ajFileNewInNameS(naccess_str)) == NULL)) { ajFmtPrintF(errf, "%s%S\n//\n", "no naccess output for: ", pdb_name); ajWarn("%s%S\n//\n", "no naccess output for: ", pdb_name); nonaccess++; ajStrDel(&ccf_this); ajPdbDel(&pdb); continue; } else ajFmtPrintF(errf, "%s%S\n//\n", "naccess output for: ", pdb_name); done_naccess = ajFalse; /* Parse NACCESS output from temp output file a line at a time. */ while(ajReadlineTrim(tempf,&line)) { if(ajStrPrefixC(line,"RES")) { /* Read data from lines. */ if((pcid = line->Ptr[8]) == ' ') ajFmtScanS(line, "%*S %S %S %f %f %f " "%f %f %f %f %f %f %f", &res, &res_num, &f1, &f2, &f3, &f4, &f5, &f6, &f7, &f8, &f9, &f10); else ajFmtScanS(line, "%*S %S %*c %S %f %f " "%f %f %f %f %f %f %f %f", &res, &res_num, &f1, &f2, &f3, &f4, &f5, &f6, &f7, &f8, &f9, &f10); /* Identify the chain, then finding all the residues corresponding to the residue. */ /* Get the chain number from the chain identifier. */ if(!ajPdbChnidToNum(pcid, pdb, &idn)) { ajWarn("Could not convert chain id %c to chain" " number in pdb file %S\n//\n", pcid, pdb_name); ajFmtPrintF(errf, "Could not convert chain id" " %c to chain number in pdb file %S\n//\n", pcid, pdb_name); continue; } /* ** Chain number will start at 1, but we want an index ** into an array which must start at 0, so subtract 1 ** from the chain number to get the index. */ chain_num = idn-1; /* ** Iiterate through the list of residues in the Pdb object, ** temp_res is an AjPResidue used to point to the current ** residue. ** ajBool found switches to true when first residue ** corresponding to the line is found. */ iter = ajListIterNewread(pdb->Chains[chain_num]->Residues); found = ajFalse; while((temp_res = (AjPResidue)ajListIterGet(iter))) { /* If we have found the residue we want, write the residue object. */ if((ajStrMatchS(res_num, temp_res->Pdb) && ajStrMatchS(res, temp_res->Id3))) { found = ajTrue; done_naccess = ajTrue; temp_res->all_abs = f1; temp_res->all_rel = f2; temp_res->side_abs = f3; temp_res->side_rel = f4; temp_res->main_abs = f5; temp_res->main_rel = f6; temp_res->npol_abs = f7; temp_res->npol_rel = f8; temp_res->pol_abs = f9; temp_res->pol_rel = f10; } /* If the matching residues have all been processed. move on to next ASG line, next residue. */ else if(found == ajTrue) break; else /* Matching residues not found yet, move on to next residue. */ continue; } ajListIterDel(&iter); } } if(done_naccess) ajFmtPrintF(errf, "%s%S\n//\n", "naccess data for: ", pdb_name); else { ajFmtPrintF(errf, "%s%S\n//\n", "no naccess data for: ", pdb_name); ajWarn("%s%S\n//\n", "no naccess data for: ", pdb_name); nonaccess++; } /* Remove temporary file (naccess output files). */ ajFileClose(&tempf); ajFmtPrintS(&exec, "rm %S", naccess_str); ajSysSystem(exec); ajStrAssignS(&naccess_str, pdbprefix); ajStrAppendS(&naccess_str, pdb->Pdb); ajStrAppendC(&naccess_str, ".asa"); ajFmtPrintS(&exec, "rm %S", naccess_str); ajSysSystem(exec); ajStrAssignS(&naccess_str, pdbprefix); ajStrAppendS(&naccess_str, pdb->Pdb); ajStrAppendC(&naccess_str, ".log"); ajFmtPrintS(&exec, "rm %S", naccess_str); ajSysSystem(exec); } /* Open CCF (output) file. */ ccf_outf = ajFileNewOutNameDirS(pdb->Pdb, ccfout); /* Write AjPPdb object to the output file in clean format. */ if(!ajPdbWriteAll(ccf_outf, pdb)) { ajWarn("%s%S\n//\n","Could not write results file for: ", pdb->Pdb); ajFmtPrintF(errf,"%s%S\n//\n","Could not write results file for ", pdb->Pdb); } ajFileClose(&ccf_outf); ajPdbDel(&pdb); ajStrDel(&ccf_this); } /* End of main application loop. */ ajFmtPrint("STRIDE failures: %d\n", nostride); ajFmtPrint("NACCESS failures: %d\n", nonaccess); ajFmtPrintF(errf, "\n\nSTRIDE failures: %d\nNACCESS failures: %d\n", nostride, nonaccess); ajListFree(&ccfin); ajDirDel(&pdbin); ajStrDel(&pdbprefix); ajStrDel(&pdb_name); ajDiroutDel(&ccfout); ajStrDel(&res); ajStrDel(&res_num); ajStrDel(&randomname); ajStrDel(&line); ajStrDel(&naccess_str); ajStrDel(&exec); ajStrDel(&syscmd); ajFileClose(&errf); if(ajStrGetCharFirst(*mode) != '2') ajFileClose(&serrf); if(ajStrGetCharFirst(*mode) != '1') ajFileClose(&nerrf); ajStrDel(&mode[0]); AJFREE(mode); ajExit(); return 0; }
static void primersearch_store_hits(const Primer primdata, AjPList fhits, AjPList rhits, const AjPSeq seq, AjBool reverse) { ajint amplen = 0; AjIList fi; AjIList ri; PHit primerhit = NULL; fi = ajListIterNewread(fhits); while(!ajListIterDone(fi)) { EmbPMatMatch fm = NULL; EmbPMatMatch rm = NULL; amplen = 0; fm = ajListIterGet(fi); ri = ajListIterNewread(rhits); while(!ajListIterDone(ri)) { ajint seqlen = ajSeqGetLen(seq); ajint s = (fm->start); ajint e; rm = ajListIterGet(ri); e = (rm->start-1); amplen = seqlen-(s-1)-e; if (amplen > 0) /* no point making a hit if -ve length! */ { primerhit = NULL; AJNEW(primerhit); primerhit->desc=NULL; /* must be NULL for ajStrAss */ primerhit->seqname=NULL; /* must be NULL for ajStrAss */ primerhit->acc=NULL; primerhit->forward=NULL; primerhit->reverse=NULL; ajStrAssignC(&primerhit->seqname,ajSeqGetNameC(seq)); ajStrAssignS(&primerhit->desc, ajSeqGetDescS(seq)); ajStrAssignS(&primerhit->acc, ajSeqGetAccS(seq)); primerhit->forward_pos = fm->start; primerhit->reverse_pos = rm->start; primerhit->forward_mismatch = fm->mm; primerhit->reverse_mismatch = rm->mm; primerhit->amplen = amplen; if(!reverse) { ajStrAssignS(&primerhit->forward, primdata->forward->patstr); ajStrAssignS(&primerhit->reverse, primdata->reverse->patstr); } else { ajStrAssignS(&primerhit->forward, primdata->reverse->patstr); ajStrAssignS(&primerhit->reverse, primdata->forward->patstr); } ajListPushAppend(primdata->hitlist, primerhit); } } /* ** clean up rListIter here as it will be new'ed again next ** time through */ ajListIterDel(&ri); } ajListIterDel(&fi); return; }
static AjBool assemoutWriteSamAlignment(AjPFile outf, const AjPAssemRead r, AjPAssemContig const * contigs, ajint ncontigs) { AjPAssemTag t = NULL; AjIList l = NULL; AjPStr qualstr = NULL; AjPStr tmp = NULL; ajint POS = 0; AjPStr CIGAR = NULL; const char* RNEXT = NULL; AjPStr SEQ = NULL; AjPStr QUAL = NULL; AjPStr SEQunpadded = NULL; AjPStr QUALunpadded = NULL; AjPStr consensus = NULL; AjBool rc= ajFalse; AjBool ret = ajTrue; const char* refseq = NULL; const AjPAssemContig contig = NULL; ajuint k = 0; if(r->Reference>=ncontigs) ajDie("assemoutWriteSamAlignment: reference sequence number" " '%d' is larger than or equal to known number of reference" " sequences '%d'. Problem while processing read '%S'.", r->Reference, ncontigs, r->Name); contig = (r->Reference==-1 ? NULL : contigs[r->Reference]); ajStrAssignRef(&SEQ, r->Seq); consensus = contig==NULL? NULL : contig->Consensus; if (r->Rnext==-1) RNEXT= "*"; else if(r->Rnext==r->Reference) RNEXT = "="; else RNEXT = ajStrGetPtr(contigs[r->Rnext]->Name); if (r->Flag & BAM_FREVERSE) { rc = ajTrue; qualstr = ajStrNewS(r->SeqQ); if(!r->Reversed) { ajStrReverse(&qualstr); ajSeqstrReverse(&SEQ); } QUAL = qualstr; POS = r->y1; ajStrAssignSubS(&tmp, SEQ, ajStrGetLen(r->Seq) - r->y2, ajStrGetLen(r->Seq) - r->x2 ); } else { rc= ajFalse; POS = r->x1; QUAL = r->SeqQ; ajStrAssignSubS(&tmp, SEQ, r->x2-1, r->y2-1 ); } if(r->Cigar==NULL && consensus) { refseq = ajStrGetPtr(consensus) + (rc ? r->y1-1 : r->x1-1); CIGAR = assemoutMakeCigar(refseq, ajStrGetPtr(tmp)); SEQunpadded = ajStrNewRes(ajStrGetLen(SEQ)); QUALunpadded = ajStrNewRes(ajStrGetLen(SEQ)); for(k=0; k< ajStrGetLen(SEQ); k++) { if (ajStrGetCharPos(SEQ, k) == '*') continue; ajStrAppendK(&SEQunpadded, ajStrGetCharPos(SEQ, k)); ajStrAppendK(&QUALunpadded, ajStrGetCharPos(QUAL, k)); } ajDebug("cigar: %S\n", CIGAR); ajStrAssignS(&tmp, CIGAR); if(rc) { if(r->y2 < (ajint)ajStrGetLen(SEQ)) ajFmtPrintS(&CIGAR, "%dS%S", ajStrGetLen(SEQ) - r->y2, tmp); if(r->x2 > 1) ajFmtPrintAppS(&CIGAR, "%dS", r->x2 - 1); } else { if(r->x2 > 1) ajFmtPrintS(&CIGAR, "%dS%S", r->x2 - 1, tmp); if(r->y2 < (ajint)ajStrGetLen(SEQ)) ajFmtPrintAppS(&CIGAR, "%dS", ajStrGetLen(SEQ) - r->y2); } ajStrDel(&tmp); } else if(r->Cigar==NULL) { ajErr("both CIGAR string and consensus sequence not available"); ret = ajFalse; ajStrAssignK(&CIGAR, '*'); } else if(!ajStrGetLen(r->Cigar)) ajStrAssignK(&CIGAR, '*'); else if(ajStrGetLen(r->Cigar)) { if(!ajStrGetLen(SEQ)) ajStrAssignK(&SEQ, '*'); if(!ajStrGetLen(QUAL)) ajStrAssignK(&QUAL, '*'); } ajStrDel(&tmp); ajFmtPrintF(outf, "%S\t%d\t%s\t%d\t%d\t%S\t%s\t%Ld\t%d\t%S\t%S", r->Name, r->Flag, (contig==NULL ? "*" : ajStrGetPtr(contig->Name)), POS, r->MapQ, (CIGAR ? CIGAR : r->Cigar), RNEXT, r->Pnext, r->Tlen, (r->Cigar ? SEQ : SEQunpadded), (r->Cigar ? QUAL : QUALunpadded)); l = ajListIterNewread(r->Tags); while (!ajListIterDone(l)) { t = ajListIterGet(l); /* TODO: array type, 'B' */ /* In SAM, all single integer types are mapped to int32_t [SAM spec] */ ajFmtPrintF(outf, "\t%S:%c:", t->Name, (t->type == 'c' || t->type == 'C' || t->type == 's' || t->type == 'S' || t->type == 'I') ? 'i' : t->type ); if(t->x1 || t->y1) ajFmtPrintF(outf, " %u %u", t->x1, t->y1); if(t->Comment && ajStrGetLen(t->Comment)>0) ajFmtPrintF(outf, "%S", t->Comment); } ajListIterDel(&l); ajFmtPrintF(outf, "\n"); if(qualstr) ajStrDel(&qualstr); ajStrDel(&SEQ); ajStrDel(&CIGAR); ajStrDel(&SEQunpadded); ajStrDel(&QUALunpadded); return ret; }
static void remap_RestrictPreferred(const AjPList l, const AjPTable t) { AjIList iter = NULL; EmbPMatMatch m = NULL; const AjPStr value = NULL; AjPStr newiso = NULL; AjBool found; /* name found in isoschizomer list */ /* for parsing value->iso string */ AjPStrTok tok = NULL; char tokens[] = " ,"; AjPStr code = NULL; iter = ajListIterNewread(l); while((m = (EmbPMatMatch)ajListIterGet(iter))) { found = ajFalse; /* get prototype name */ value = ajTableFetchS(t, m->cod); if(value) { ajStrAssignC(&newiso, ""); /* parse isoschizomer names from m->iso */ ajStrTokenDel(&tok); tok = ajStrTokenNewC(m->iso, tokens); while(ajStrTokenNextParseC(&tok, tokens, &code)) { if(ajStrGetLen(newiso) > 0) ajStrAppendC(&newiso, ","); /* found the prototype name? */ if(!ajStrCmpCaseS(code, value)) { ajStrAppendS(&newiso, m->cod); found = ajTrue; } else ajStrAppendS(&newiso, code); } ajStrTokenDel(&tok); /* if the name was not replaced, then add it in now */ if(!found) { if(ajStrGetLen(newiso) > 0) ajStrAppendC(&newiso, ","); ajStrAppendS(&newiso, m->cod); } ajDebug("RE: %S -> %S iso=%S newiso=%S\n", m->cod, value, m->iso, newiso); /* replace the old iso string with the new one */ ajStrAssignS(&m->iso, newiso); /* rename the enzyme to the prototype name */ ajStrAssignS(&m->cod, value); } } ajListIterDel(&iter); ajStrDel(&newiso); ajStrDel(&code); ajStrTokenDel(&tok); return; }
static void dottup_stretchplot(AjPGraph graph, const AjPList matchlist, const AjPSeq seq1, const AjPSeq seq2, ajint begin1, ajint begin2, ajint end1, ajint end2) { EmbPWordMatch wmp = NULL; float xa[1]; float ya[2]; AjPGraphdata gdata = NULL; AjPStr tit = NULL; float x1; float y1; float x2; float y2; AjIList iter = NULL; tit = ajStrNew(); ajFmtPrintS(&tit,"%S",ajGraphGetTitleS(graph)); gdata = ajGraphdataNewI(1); xa[0] = (float)begin1; ya[0] = (float)begin2; ajGraphSetTitleC(graph,ajStrGetPtr(tit)); ajGraphSetXlabelC(graph,ajSeqGetNameC(seq1)); ajGraphSetYlabelC(graph,ajSeqGetNameC(seq2)); ajGraphdataSetTypeC(gdata,"2D Plot Float"); ajGraphdataSetMinmax(gdata,(float)begin1,(float)end1,(float)begin2, (float)end2); ajGraphdataSetTruescale(gdata,(float)begin1,(float)end1,(float)begin2, (float)end2); ajGraphxySetXstartF(graph,(float)begin1); ajGraphxySetXendF(graph,(float)end1); ajGraphxySetYstartF(graph,(float)begin2); ajGraphxySetYendF(graph,(float)end2); ajGraphxySetXrangeII(graph,begin1,end1); ajGraphxySetYrangeII(graph,begin2,end2); if(matchlist) { iter = ajListIterNewread(matchlist); while((wmp = ajListIterGet(iter))) { x1 = x2 = (float) (wmp->seq1start + begin1); y1 = y2 = (float) (wmp->seq2start + begin2); x2 += (float) wmp->length-1; y2 += (float) wmp->length-1; ajGraphAddLine(graph,x1,y1,x2,y2,0); } ajListIterDel(&iter); } ajGraphdataAddXY(gdata,xa,ya); ajGraphDataReplace(graph,gdata); ajGraphxyDisplay(graph,ajFalse); ajGraphicsClose(); ajStrDel(&tit); return; }
static void remap_NoCutList(AjPFile outfile, const AjPTable hittable, AjBool html, const AjPStr enzymes, AjBool blunt, AjBool sticky, ajuint sitelen, AjBool commercial, AjBool ambiguity, AjBool limit, const AjPTable retable) { /* for iterating over hittable */ PValue value; void **keyarray = NULL; /* array for table */ void **valarray = NULL; /* array for table */ ajint i; /* list of enzymes that cut */ AjPList cutlist; AjIList citer; /* iterator for cutlist */ AjPStr cutname = NULL; AjBool found; /* for parsing value->iso string */ AjPStrTok tok; char tokens[] = " ,"; AjPStr code = NULL; const char *p; /* for reading in enzymes names */ AjPFile enzfile = NULL; AjPStr *ea; ajint ne; /* number of enzymes */ AjBool isall = ajTrue; /* list of enzymes that don't cut */ AjPList nocutlist; AjIList niter; /* iterator for nocutlist */ AjPStr nocutname = NULL; /* count of rejected enzymes not matching criteria */ ajint rejected_count = 0; EmbPPatRestrict enz; /* for renaming preferred isoschizomers */ AjPList newlist; /* ** ** Make a list of enzymes('cutlist') that hit ** including the isoschizomer names ** */ ajDebug("Make a list of all enzymes that cut\n"); cutlist = ajListstrNew(); nocutlist = ajListstrNew(); ajTableToarrayKeysValues(hittable, &keyarray, &valarray); for(i = 0; keyarray[i]; i++) { value = (PValue) valarray[i]; cutname = ajStrNew(); ajStrAssignRef(&cutname, keyarray[i]); ajListstrPushAppend(cutlist, cutname); /* Add to cutlist all isoschizomers of enzymes that cut */ ajDebug("Add to cutlist all isoschizomers of enzymes that cut\n"); /* start token to parse isoschizomers names */ tok = ajStrTokenNewC(value->iso, tokens); while(ajStrTokenNextParseC(&tok, tokens, &code)) { cutname = ajStrNew(); ajStrAssignS(&cutname, code); ajListstrPushAppend(cutlist, cutname); } ajStrTokenDel(&tok); } ajStrDel(&code); AJFREE(keyarray); AJFREE(valarray); /* ** Read in list of enzymes ('nocutlist') - either all or ** the input enzyme list. ** Exclude those that don't match the selection criteria - count these. */ ajDebug("Read in a list of all input enzyme names\n"); ne = 0; if(!enzymes) isall = ajTrue; else { /* get input list of enzymes into ea[] */ ne = ajArrCommaList(enzymes, &ea); if(ajStrMatchCaseC(ea[0], "all")) isall = ajTrue; else { isall = ajFalse; for(i=0; i<ne; ++i) ajStrRemoveWhite(&ea[i]); } } enzfile = ajDatafileNewInNameC(ENZDATA); /* push all enzyme names without the required criteria onto nocutlist */ enz = embPatRestrictNew(); while(!ajFileIsEof(enzfile)) { if(!embPatRestrictReadEntry(enz, enzfile)) continue; /* ** If user entered explicit enzyme list, then check to see if ** this is one of that explicit list */ if(!isall) { found = AJFALSE; for(i=0; i<ne; ++i) if(ajStrMatchCaseS(ea[i], enz->cod)) { found = AJTRUE; break; } if(!found) /* not in the explicit list */ continue; ajDebug("RE %S is in the input explicit list of REs\n", enz->cod); } /* ignore ncuts==0 as they are unknown */ if(!enz->ncuts) { /* number of cut positions */ ajDebug("RE %S has an unknown number of cut positions\n", enz->cod); continue; } ajDebug("RE %S has a known number of cut sites\n", enz->cod); if(enz->len < sitelen) { /* recognition site length */ ajDebug("RE %S does not have a long enough recognition site\n", enz->cod); rejected_count++; continue; } if(!blunt && enz->blunt) { /* blunt/sticky */ ajDebug("RE %S is blunt\n", enz->cod); rejected_count++; continue; } if(!sticky && !enz->blunt) { /* blunt/sticky */ ajDebug("RE %S is sticky\n", enz->cod); rejected_count++; continue; } /* commercially available enzymes have uppercase patterns */ p = ajStrGetPtr(enz->pat); /* ** The -commercial qualifier is only used if we are searching ** through 'all' of the REBASE database - if we have specified an ** explicit list of enzymes then they are searched for whether or ** not they are commercially available */ if((*p >= 'a' && *p <= 'z') && commercial && isall) { ajDebug("RE %S is not commercial\n", enz->cod); rejected_count++; continue; } if(!ambiguity && remap_Ambiguous(enz->pat)) { ajDebug("RE %S is ambiguous\n", enz->cod); rejected_count++; continue; } ajDebug("RE %S matches all required criteria\n", enz->cod); code = ajStrNew(); ajStrAssignS(&code, enz->cod); ajListstrPushAppend(nocutlist, code); } embPatRestrictDel(&enz); ajFileClose(&enzfile); for(i=0; i<ne; ++i) if(ea[i]) ajStrDel(&ea[i]); if(ne) AJFREE(ea); /* ** Change names of enzymes in the non-cutter list ** to that of preferred (prototype) ** enzyme name so that the isoschizomers of cutters ** will be removed from the ** non-cutter list in the next bit. ** Remove duplicate prototype names. */ if(limit) { newlist = ajListstrNew(); remap_RenamePreferred(nocutlist, retable, newlist); ajListstrFreeData(&nocutlist); nocutlist = newlist; ajListSortUnique(nocutlist, remap_cmpcase, remap_strdel); } /* ** Iterate through the list of input enzymes removing those that are in ** the cutlist. */ ajDebug("Remove from the nocutlist all enzymes and isoschizomers " "that cut\n"); /* ** This steps down both lists at the same time, comparing names and ** iterating to the next name in whichever list whose name compares ** alphabetically before the other. Where a match is found, the ** nocutlist item is deleted. */ ajListSort(nocutlist, remap_cmpcase); ajListSort(cutlist, remap_cmpcase); citer = ajListIterNewread(cutlist); niter = ajListIterNew(nocutlist); /* while((cutname = (AjPStr)ajListIterGet(citer)) != NULL) ajDebug("dbg cutname = %S\n", cutname); */ nocutname = (AjPStr)ajListIterGet(niter); cutname = (AjPStr)ajListIterGet(citer); ajDebug("initial cutname, nocutname: '%S' '%S'\n", cutname, nocutname); while(nocutname != NULL && cutname != NULL) { i = ajStrCmpCaseS(cutname, nocutname); ajDebug("compare cutname, nocutname: %S %S ", cutname, nocutname); ajDebug("ajStrCmpCase=%d\n", i); if(i == 0) { /* match - so remove from nocutlist */ ajDebug("ajListstrRemove %S\n", nocutname); ajListstrIterRemove(niter); nocutname = (AjPStr)ajListIterGet(niter); /* ** Don't increment the cutname list pointer here ** - there may be more than one entry in the nocutname ** list with the same name because we have converted ** isoschizomers to their preferred name */ /* cutname = (AjPStr)ajListIterGet(citer); */ } else if(i == -1) /* cutlist name sorts before nocutlist name */ cutname = (AjPStr)ajListIterGet(citer); else if(i == 1) /* nocutlist name sorts before cutlist name */ nocutname = (AjPStr)ajListIterGet(niter); } ajListIterDel(&citer); ajListIterDel(&niter); ajListstrFreeData(&cutlist); /* Print the resulting list of those that do not cut*/ ajDebug("Print out the list\n"); /* print the title */ if(html) ajFmtPrintF(outfile, "<H2>"); ajFmtPrintF(outfile, "\n\n# Enzymes that do not cut\n\n"); if(html) ajFmtPrintF(outfile, "</H2>\n"); if(html) ajFmtPrintF(outfile, "<PRE>"); /* ajListSort(nocutlist, ajStrVcmp);*/ niter = ajListIterNewread(nocutlist); i = 0; while((nocutname = (AjPStr)ajListIterGet(niter)) != NULL) { ajFmtPrintF(outfile, "%-10S", nocutname); /* new line after every 7 names printed */ if(i++ == 7) { ajFmtPrintF(outfile, "\n"); i = 0; } } ajListIterDel(&niter); /* end the output */ ajFmtPrintF(outfile, "\n"); if(html) {ajFmtPrintF(outfile, "</PRE>\n");} /* ** Print the count of rejected enzymes ** N.B. This is the count of ALL rejected enzymes including all ** isoschizomers */ if(html) ajFmtPrintF(outfile, "<H2>"); ajFmtPrintF(outfile, "\n\n# No. of cutting enzymes which do not match the\n" "# SITELEN, BLUNT, STICKY, COMMERCIAL, AMBIGUOUS criteria\n\n"); if(html) ajFmtPrintF(outfile, "</H2>\n"); ajFmtPrintF(outfile, "%d\n", rejected_count); ajDebug("Tidy up\n"); ajListstrFreeData(&nocutlist); ajListstrFreeData(&cutlist); return; }
int main(int argc, char **argv) { /* Variable declarations */ AjPStr dbname = NULL; AjPStr svrname = NULL; AjPFile outfile = NULL; AjPResource resource = NULL; AjPResourcein resourcein = NULL; AjPStr resourceqry = NULL; AjPStr type = NULL; AjBool id; AjBool qry; AjBool all; AjBool verbose; AjPStr methods = NULL; AjPStr release = NULL; AjPStr comment = NULL; AjPStr defined = NULL; AjPList list = NULL; AjPTagval tagval = NULL; AjIList iter = NULL; ajuint space = 0; AjPList aliaslist = NULL; AjIList aliter = NULL; const AjPStr alias = NULL; ajuint maxlen; AjPStr truedbname = NULL; AjBool isalias = ajFalse; /* ACD processing */ embInit("dbtell", argc, argv); dbname = ajAcdGetString("database"); svrname = ajAcdGetString("server"); verbose = ajAcdGetBoolean("full"); outfile = ajAcdGetOutfile("outfile"); ajStrAssignS(&truedbname, dbname); ajNamAliasDatabase(&truedbname); ajStrFmtLower(&truedbname); if(!ajStrMatchS(dbname, truedbname)) isalias = ajTrue; /* Application logic */ /* Check EMBOSS database information. Write output file */ if(ajNamDbDetailsSvr(truedbname, svrname, &type, &id, &qry, &all, &comment, &release, &methods, &defined)) { if(isalias) ajFmtPrintF(outfile, "# %S is an alias for %S defined in %S\n", dbname, truedbname, defined); else ajFmtPrintF(outfile, "# %S is defined in %S\n", truedbname, defined); ajFmtPrintF(outfile, "# access levels id: %B query: %B all: %B\n\n", id, qry, all); ajFmtPrintF(outfile, "DBNAME %S [\n", truedbname); if(ajStrGetLen(svrname)) list = ajNamDbGetAttrlistSvr(truedbname, svrname); else list = ajNamDbGetAttrlist(truedbname); iter = ajListIterNewread(list); while(!ajListIterDone(iter)) { tagval = ajListIterGet(iter); space = 15 - ajStrGetLen(ajTagvalGetTag(tagval)); ajFmtPrintF(outfile, " %S:%.*s\"%S\"\n", ajTagvalGetTag(tagval), space, " ", ajTagvalGetValue(tagval)); ajTagvalDel(&tagval); } ajListIterDel(&iter); ajListFree(&list); ajFmtPrintF(outfile, "]\n"); if(verbose) { aliaslist = ajListNew(); ajNamListFindAliases(truedbname, aliaslist); if(ajListGetLength(aliaslist)) { ajFmtPrintF(outfile, "\n"); aliter = ajListIterNewread(aliaslist); maxlen = 1; while(!ajListIterDone(aliter)) { alias = ajListIterGet(aliter); if(MAJSTRGETLEN(alias) > maxlen) maxlen = MAJSTRGETLEN(alias); } ajListIterDel(&aliter); aliter = ajListIterNewread(aliaslist); while(!ajListIterDone(aliter)) { alias = ajListIterGet(aliter); if(ajStrFindK(alias, ':') < 0) ajFmtPrintF(outfile, "ALIAS %-*S %S\n", maxlen, alias, truedbname); } ajListIterDel(&aliter); } ajListstrFree(&aliaslist); } ajStrDel(&type); ajStrDel(&methods); ajStrDel(&release); ajStrDel(&comment); ajStrDel(&defined); } else { /* try looking in DRCAT */ resourcein = ajResourceinNew(); resource = ajResourceNew(); ajFmtPrintS(&resourceqry, "drcat:%S", dbname); ajResourceinQryS(resourcein, resourceqry); if(ajResourceinRead(resourcein, resource)) { ajFmtPrintF(outfile, "DBNAME %S [\n", dbname); ajFmtPrintF(outfile, " comment: \"defined in DRCAT\"\n"); ajFmtPrintF(outfile, "]\n"); } ajResourceinDel(&resourcein); ajResourceDel(&resource); ajStrDel(&resourceqry); } /* Memory clean-up and exit */ ajFileClose(&outfile); ajStrDel(&truedbname); ajStrDel(&dbname); ajStrDel(&svrname); embExit(); return 0; }