static void remap_NoCutList(AjPFile outfile, const AjPTable hittable, AjBool html, const AjPStr enzymes, AjBool blunt, AjBool sticky, ajuint sitelen, AjBool commercial, AjBool ambiguity, AjBool limit, const AjPTable retable) { /* for iterating over hittable */ PValue value; void **keyarray = NULL; /* array for table */ void **valarray = NULL; /* array for table */ ajint i; /* list of enzymes that cut */ AjPList cutlist; AjIList citer; /* iterator for cutlist */ AjPStr cutname = NULL; AjBool found; /* for parsing value->iso string */ AjPStrTok tok; char tokens[] = " ,"; AjPStr code = NULL; const char *p; /* for reading in enzymes names */ AjPFile enzfile = NULL; AjPStr *ea; ajint ne; /* number of enzymes */ AjBool isall = ajTrue; /* list of enzymes that don't cut */ AjPList nocutlist; AjIList niter; /* iterator for nocutlist */ AjPStr nocutname = NULL; /* count of rejected enzymes not matching criteria */ ajint rejected_count = 0; EmbPPatRestrict enz; /* for renaming preferred isoschizomers */ AjPList newlist; /* ** ** Make a list of enzymes('cutlist') that hit ** including the isoschizomer names ** */ ajDebug("Make a list of all enzymes that cut\n"); cutlist = ajListstrNew(); nocutlist = ajListstrNew(); ajTableToarrayKeysValues(hittable, &keyarray, &valarray); for(i = 0; keyarray[i]; i++) { value = (PValue) valarray[i]; cutname = ajStrNew(); ajStrAssignRef(&cutname, keyarray[i]); ajListstrPushAppend(cutlist, cutname); /* Add to cutlist all isoschizomers of enzymes that cut */ ajDebug("Add to cutlist all isoschizomers of enzymes that cut\n"); /* start token to parse isoschizomers names */ tok = ajStrTokenNewC(value->iso, tokens); while(ajStrTokenNextParseC(&tok, tokens, &code)) { cutname = ajStrNew(); ajStrAssignS(&cutname, code); ajListstrPushAppend(cutlist, cutname); } ajStrTokenDel(&tok); } ajStrDel(&code); AJFREE(keyarray); AJFREE(valarray); /* ** Read in list of enzymes ('nocutlist') - either all or ** the input enzyme list. ** Exclude those that don't match the selection criteria - count these. */ ajDebug("Read in a list of all input enzyme names\n"); ne = 0; if(!enzymes) isall = ajTrue; else { /* get input list of enzymes into ea[] */ ne = ajArrCommaList(enzymes, &ea); if(ajStrMatchCaseC(ea[0], "all")) isall = ajTrue; else { isall = ajFalse; for(i=0; i<ne; ++i) ajStrRemoveWhite(&ea[i]); } } enzfile = ajDatafileNewInNameC(ENZDATA); /* push all enzyme names without the required criteria onto nocutlist */ enz = embPatRestrictNew(); while(!ajFileIsEof(enzfile)) { if(!embPatRestrictReadEntry(enz, enzfile)) continue; /* ** If user entered explicit enzyme list, then check to see if ** this is one of that explicit list */ if(!isall) { found = AJFALSE; for(i=0; i<ne; ++i) if(ajStrMatchCaseS(ea[i], enz->cod)) { found = AJTRUE; break; } if(!found) /* not in the explicit list */ continue; ajDebug("RE %S is in the input explicit list of REs\n", enz->cod); } /* ignore ncuts==0 as they are unknown */ if(!enz->ncuts) { /* number of cut positions */ ajDebug("RE %S has an unknown number of cut positions\n", enz->cod); continue; } ajDebug("RE %S has a known number of cut sites\n", enz->cod); if(enz->len < sitelen) { /* recognition site length */ ajDebug("RE %S does not have a long enough recognition site\n", enz->cod); rejected_count++; continue; } if(!blunt && enz->blunt) { /* blunt/sticky */ ajDebug("RE %S is blunt\n", enz->cod); rejected_count++; continue; } if(!sticky && !enz->blunt) { /* blunt/sticky */ ajDebug("RE %S is sticky\n", enz->cod); rejected_count++; continue; } /* commercially available enzymes have uppercase patterns */ p = ajStrGetPtr(enz->pat); /* ** The -commercial qualifier is only used if we are searching ** through 'all' of the REBASE database - if we have specified an ** explicit list of enzymes then they are searched for whether or ** not they are commercially available */ if((*p >= 'a' && *p <= 'z') && commercial && isall) { ajDebug("RE %S is not commercial\n", enz->cod); rejected_count++; continue; } if(!ambiguity && remap_Ambiguous(enz->pat)) { ajDebug("RE %S is ambiguous\n", enz->cod); rejected_count++; continue; } ajDebug("RE %S matches all required criteria\n", enz->cod); code = ajStrNew(); ajStrAssignS(&code, enz->cod); ajListstrPushAppend(nocutlist, code); } embPatRestrictDel(&enz); ajFileClose(&enzfile); for(i=0; i<ne; ++i) if(ea[i]) ajStrDel(&ea[i]); if(ne) AJFREE(ea); /* ** Change names of enzymes in the non-cutter list ** to that of preferred (prototype) ** enzyme name so that the isoschizomers of cutters ** will be removed from the ** non-cutter list in the next bit. ** Remove duplicate prototype names. */ if(limit) { newlist = ajListstrNew(); remap_RenamePreferred(nocutlist, retable, newlist); ajListstrFreeData(&nocutlist); nocutlist = newlist; ajListSortUnique(nocutlist, remap_cmpcase, remap_strdel); } /* ** Iterate through the list of input enzymes removing those that are in ** the cutlist. */ ajDebug("Remove from the nocutlist all enzymes and isoschizomers " "that cut\n"); /* ** This steps down both lists at the same time, comparing names and ** iterating to the next name in whichever list whose name compares ** alphabetically before the other. Where a match is found, the ** nocutlist item is deleted. */ ajListSort(nocutlist, remap_cmpcase); ajListSort(cutlist, remap_cmpcase); citer = ajListIterNewread(cutlist); niter = ajListIterNew(nocutlist); /* while((cutname = (AjPStr)ajListIterGet(citer)) != NULL) ajDebug("dbg cutname = %S\n", cutname); */ nocutname = (AjPStr)ajListIterGet(niter); cutname = (AjPStr)ajListIterGet(citer); ajDebug("initial cutname, nocutname: '%S' '%S'\n", cutname, nocutname); while(nocutname != NULL && cutname != NULL) { i = ajStrCmpCaseS(cutname, nocutname); ajDebug("compare cutname, nocutname: %S %S ", cutname, nocutname); ajDebug("ajStrCmpCase=%d\n", i); if(i == 0) { /* match - so remove from nocutlist */ ajDebug("ajListstrRemove %S\n", nocutname); ajListstrIterRemove(niter); nocutname = (AjPStr)ajListIterGet(niter); /* ** Don't increment the cutname list pointer here ** - there may be more than one entry in the nocutname ** list with the same name because we have converted ** isoschizomers to their preferred name */ /* cutname = (AjPStr)ajListIterGet(citer); */ } else if(i == -1) /* cutlist name sorts before nocutlist name */ cutname = (AjPStr)ajListIterGet(citer); else if(i == 1) /* nocutlist name sorts before cutlist name */ nocutname = (AjPStr)ajListIterGet(niter); } ajListIterDel(&citer); ajListIterDel(&niter); ajListstrFreeData(&cutlist); /* Print the resulting list of those that do not cut*/ ajDebug("Print out the list\n"); /* print the title */ if(html) ajFmtPrintF(outfile, "<H2>"); ajFmtPrintF(outfile, "\n\n# Enzymes that do not cut\n\n"); if(html) ajFmtPrintF(outfile, "</H2>\n"); if(html) ajFmtPrintF(outfile, "<PRE>"); /* ajListSort(nocutlist, ajStrVcmp);*/ niter = ajListIterNewread(nocutlist); i = 0; while((nocutname = (AjPStr)ajListIterGet(niter)) != NULL) { ajFmtPrintF(outfile, "%-10S", nocutname); /* new line after every 7 names printed */ if(i++ == 7) { ajFmtPrintF(outfile, "\n"); i = 0; } } ajListIterDel(&niter); /* end the output */ ajFmtPrintF(outfile, "\n"); if(html) {ajFmtPrintF(outfile, "</PRE>\n");} /* ** Print the count of rejected enzymes ** N.B. This is the count of ALL rejected enzymes including all ** isoschizomers */ if(html) ajFmtPrintF(outfile, "<H2>"); ajFmtPrintF(outfile, "\n\n# No. of cutting enzymes which do not match the\n" "# SITELEN, BLUNT, STICKY, COMMERCIAL, AMBIGUOUS criteria\n\n"); if(html) ajFmtPrintF(outfile, "</H2>\n"); ajFmtPrintF(outfile, "%d\n", rejected_count); ajDebug("Tidy up\n"); ajListstrFreeData(&nocutlist); ajListstrFreeData(&cutlist); return; }
static ajint silent_restr_read(AjPList *relist,const AjPStr enzymes) { EmbPPatRestrict rptr = NULL; AjPFile fin = NULL; AjPStr refilename = NULL; register ajint RStotal = 0; PRinfo rinfo = NULL; AjBool isall = ajFalse; ajint ne = 0; ajint i; AjPStr *ea = NULL; refilename = ajStrNewC("REBASE/embossre.enz"); rptr = embPatRestrictNew(); *relist = ajListNew(); fin = ajDatafileNewInNameS(refilename); if(!fin) ajFatal("Aborting...restriction file '%S' not found", refilename); /* Parse the user-selected enzyme list */ if(!enzymes) isall = ajTrue; else { ne = ajArrCommaList(enzymes,&ea); for(i=0;i<ne;++i) ajStrRemoveWhite(&ea[i]); if(ajStrMatchCaseC(ea[0],"all")) isall = ajTrue; else isall = ajFalse; } while(!ajFileIsEof(fin)) { if(!embPatRestrictReadEntry(rptr,fin)) continue; if(!isall) { for(i=0;i<ne;++i) if(ajStrMatchCaseS(ea[i],rptr->cod)) break; if(i==ne) continue; } AJNEW(rinfo); /* reading in RE info into rinfo from EmbPPatRestrict structure */ rinfo->code = ajStrNewC(ajStrGetPtr(rptr->cod)); rinfo->site = ajStrNewC(ajStrGetPtr(rptr->pat)); rinfo->ncuts = rptr->ncuts; rinfo->cut1 = rptr->cut1; rinfo->cut2 = rptr->cut2; rinfo->cut3 = rptr->cut3; rinfo->cut4 = rptr->cut4; ajListPush(*relist,(void *)rinfo); RStotal++; } for(i=0;i<ne;++i) ajStrDel(&ea[i]); AJFREE(ea); embPatRestrictDel(&rptr); ajFileClose(&fin); ajStrDel(&refilename); return RStotal; }