AjBool ensListUintSortDescendingUnique(AjPList list) { if (!list) return ajFalse; ajListSortUnique(list, &listUintCompareDescending, &listUintDelete); return ajTrue; }
static void jaspscan_ParseInput(const AjPStr dir, const AjPStr jaspdir, const AjPStr mats, const AjPStr excl, ajuint *recurs, AjPList ret) { ajuint nm = 0; ajuint ne = 0; AjPStr *carr = NULL; AjPStr *earr = NULL; AjPFile inf = NULL; AjPStr line = NULL; AjPStr comm = NULL; AjPStr val = NULL; ajuint i; ajuint j; char c; ajuint rlen = 0; if(*recurs > JASPSCAN_RECURS) ajFatal("Too many recursion levels in matrix list files"); line = ajStrNew(); comm = ajStrNew(); if(mats) { nm = ajArrCommaList(mats,&carr); for(i=0; i < nm; ++i) { if(ajStrGetCharFirst(carr[i]) != '@') ajStrFmtUpper(&carr[i]); if(ajStrMatchC(carr[i],"ALL")) { jaspscan_GetFileList(dir, jaspdir, "*", ret); ajListSortUnique(ret, ajStrVcmp, jaspscan_strdel); } else if(ajStrGetCharFirst(carr[i]) == '@') { ajStrTrimStartC(&carr[i],"@"); inf = ajFileNewInNameS(carr[i]); if(!inf) ajFatal("Cannot open list file %S",carr[i]); while(ajReadlineTrim(inf,&line)) { ajStrRemoveWhite(&line); c = ajStrGetCharFirst(line); if(c == '#' || c== '!') continue; if(ajStrGetLen(comm)) ajStrAppendC(&comm,","); ajStrFmtUpper(&line); ajStrAppendS(&comm,line); } *recurs += 1; jaspscan_ParseInput(dir,jaspdir,comm,NULL,recurs,ret); *recurs -= 1; ajListSortUnique(ret, ajStrVcmp, jaspscan_strdel); ajFileClose(&inf); } else { jaspscan_GetFileList(dir,jaspdir,ajStrGetPtr(carr[i]),ret); ajListSortUnique(ret, ajStrVcmp, jaspscan_strdel); } } for(i=0; i < nm; ++i) ajStrDel(&carr[i]); AJFREE(carr); } if(excl) { ne = ajArrCommaList(excl,&earr); for(i=0; i < ne; ++i) { if(ajStrGetCharFirst(earr[i]) != '@') ajStrFmtUpper(&earr[i]); if(ajStrGetCharFirst(earr[i]) == '@') { ajStrTrimStartC(&earr[i],"@"); inf = ajFileNewInNameS(earr[i]); if(!inf) ajFatal("Cannot open list file %S",earr[i]); while(ajReadlineTrim(inf,&line)) { ajStrRemoveWhite(&line); c = ajStrGetCharFirst(line); if(c == '#' || c== '!') continue; if(ajStrGetLen(comm)) ajStrAppendC(&comm,","); ajStrFmtUpper(&line); ajStrAppendS(&comm,line); } *recurs += 1; jaspscan_ParseInput(dir,jaspdir,NULL,comm,recurs,ret); *recurs -= 1; ajListSortUnique(ret, ajStrVcmp, jaspscan_strdel); ajFileClose(&inf); } else { ajStrAssignS(&line,earr[i]); ajStrAppendC(&line,J_EXT); rlen = ajListGetLength(ret); for(j=0; j < rlen; ++j) { ajListPop(ret,(void **)&val); if(ajStrSuffixS(val,line)) ajStrDel(&val); else ajListPushAppend(ret,(void *)val); } } } for(i=0; i < ne; ++i) ajStrDel(&earr[i]); AJFREE(earr); } ajStrDel(&line); ajStrDel(&comm); return; }
static void remap_NoCutList(AjPFile outfile, const AjPTable hittable, AjBool html, const AjPStr enzymes, AjBool blunt, AjBool sticky, ajuint sitelen, AjBool commercial, AjBool ambiguity, AjBool limit, const AjPTable retable) { /* for iterating over hittable */ PValue value; void **keyarray = NULL; /* array for table */ void **valarray = NULL; /* array for table */ ajint i; /* list of enzymes that cut */ AjPList cutlist; AjIList citer; /* iterator for cutlist */ AjPStr cutname = NULL; AjBool found; /* for parsing value->iso string */ AjPStrTok tok; char tokens[] = " ,"; AjPStr code = NULL; const char *p; /* for reading in enzymes names */ AjPFile enzfile = NULL; AjPStr *ea; ajint ne; /* number of enzymes */ AjBool isall = ajTrue; /* list of enzymes that don't cut */ AjPList nocutlist; AjIList niter; /* iterator for nocutlist */ AjPStr nocutname = NULL; /* count of rejected enzymes not matching criteria */ ajint rejected_count = 0; EmbPPatRestrict enz; /* for renaming preferred isoschizomers */ AjPList newlist; /* ** ** Make a list of enzymes('cutlist') that hit ** including the isoschizomer names ** */ ajDebug("Make a list of all enzymes that cut\n"); cutlist = ajListstrNew(); nocutlist = ajListstrNew(); ajTableToarrayKeysValues(hittable, &keyarray, &valarray); for(i = 0; keyarray[i]; i++) { value = (PValue) valarray[i]; cutname = ajStrNew(); ajStrAssignRef(&cutname, keyarray[i]); ajListstrPushAppend(cutlist, cutname); /* Add to cutlist all isoschizomers of enzymes that cut */ ajDebug("Add to cutlist all isoschizomers of enzymes that cut\n"); /* start token to parse isoschizomers names */ tok = ajStrTokenNewC(value->iso, tokens); while(ajStrTokenNextParseC(&tok, tokens, &code)) { cutname = ajStrNew(); ajStrAssignS(&cutname, code); ajListstrPushAppend(cutlist, cutname); } ajStrTokenDel(&tok); } ajStrDel(&code); AJFREE(keyarray); AJFREE(valarray); /* ** Read in list of enzymes ('nocutlist') - either all or ** the input enzyme list. ** Exclude those that don't match the selection criteria - count these. */ ajDebug("Read in a list of all input enzyme names\n"); ne = 0; if(!enzymes) isall = ajTrue; else { /* get input list of enzymes into ea[] */ ne = ajArrCommaList(enzymes, &ea); if(ajStrMatchCaseC(ea[0], "all")) isall = ajTrue; else { isall = ajFalse; for(i=0; i<ne; ++i) ajStrRemoveWhite(&ea[i]); } } enzfile = ajDatafileNewInNameC(ENZDATA); /* push all enzyme names without the required criteria onto nocutlist */ enz = embPatRestrictNew(); while(!ajFileIsEof(enzfile)) { if(!embPatRestrictReadEntry(enz, enzfile)) continue; /* ** If user entered explicit enzyme list, then check to see if ** this is one of that explicit list */ if(!isall) { found = AJFALSE; for(i=0; i<ne; ++i) if(ajStrMatchCaseS(ea[i], enz->cod)) { found = AJTRUE; break; } if(!found) /* not in the explicit list */ continue; ajDebug("RE %S is in the input explicit list of REs\n", enz->cod); } /* ignore ncuts==0 as they are unknown */ if(!enz->ncuts) { /* number of cut positions */ ajDebug("RE %S has an unknown number of cut positions\n", enz->cod); continue; } ajDebug("RE %S has a known number of cut sites\n", enz->cod); if(enz->len < sitelen) { /* recognition site length */ ajDebug("RE %S does not have a long enough recognition site\n", enz->cod); rejected_count++; continue; } if(!blunt && enz->blunt) { /* blunt/sticky */ ajDebug("RE %S is blunt\n", enz->cod); rejected_count++; continue; } if(!sticky && !enz->blunt) { /* blunt/sticky */ ajDebug("RE %S is sticky\n", enz->cod); rejected_count++; continue; } /* commercially available enzymes have uppercase patterns */ p = ajStrGetPtr(enz->pat); /* ** The -commercial qualifier is only used if we are searching ** through 'all' of the REBASE database - if we have specified an ** explicit list of enzymes then they are searched for whether or ** not they are commercially available */ if((*p >= 'a' && *p <= 'z') && commercial && isall) { ajDebug("RE %S is not commercial\n", enz->cod); rejected_count++; continue; } if(!ambiguity && remap_Ambiguous(enz->pat)) { ajDebug("RE %S is ambiguous\n", enz->cod); rejected_count++; continue; } ajDebug("RE %S matches all required criteria\n", enz->cod); code = ajStrNew(); ajStrAssignS(&code, enz->cod); ajListstrPushAppend(nocutlist, code); } embPatRestrictDel(&enz); ajFileClose(&enzfile); for(i=0; i<ne; ++i) if(ea[i]) ajStrDel(&ea[i]); if(ne) AJFREE(ea); /* ** Change names of enzymes in the non-cutter list ** to that of preferred (prototype) ** enzyme name so that the isoschizomers of cutters ** will be removed from the ** non-cutter list in the next bit. ** Remove duplicate prototype names. */ if(limit) { newlist = ajListstrNew(); remap_RenamePreferred(nocutlist, retable, newlist); ajListstrFreeData(&nocutlist); nocutlist = newlist; ajListSortUnique(nocutlist, remap_cmpcase, remap_strdel); } /* ** Iterate through the list of input enzymes removing those that are in ** the cutlist. */ ajDebug("Remove from the nocutlist all enzymes and isoschizomers " "that cut\n"); /* ** This steps down both lists at the same time, comparing names and ** iterating to the next name in whichever list whose name compares ** alphabetically before the other. Where a match is found, the ** nocutlist item is deleted. */ ajListSort(nocutlist, remap_cmpcase); ajListSort(cutlist, remap_cmpcase); citer = ajListIterNewread(cutlist); niter = ajListIterNew(nocutlist); /* while((cutname = (AjPStr)ajListIterGet(citer)) != NULL) ajDebug("dbg cutname = %S\n", cutname); */ nocutname = (AjPStr)ajListIterGet(niter); cutname = (AjPStr)ajListIterGet(citer); ajDebug("initial cutname, nocutname: '%S' '%S'\n", cutname, nocutname); while(nocutname != NULL && cutname != NULL) { i = ajStrCmpCaseS(cutname, nocutname); ajDebug("compare cutname, nocutname: %S %S ", cutname, nocutname); ajDebug("ajStrCmpCase=%d\n", i); if(i == 0) { /* match - so remove from nocutlist */ ajDebug("ajListstrRemove %S\n", nocutname); ajListstrIterRemove(niter); nocutname = (AjPStr)ajListIterGet(niter); /* ** Don't increment the cutname list pointer here ** - there may be more than one entry in the nocutname ** list with the same name because we have converted ** isoschizomers to their preferred name */ /* cutname = (AjPStr)ajListIterGet(citer); */ } else if(i == -1) /* cutlist name sorts before nocutlist name */ cutname = (AjPStr)ajListIterGet(citer); else if(i == 1) /* nocutlist name sorts before cutlist name */ nocutname = (AjPStr)ajListIterGet(niter); } ajListIterDel(&citer); ajListIterDel(&niter); ajListstrFreeData(&cutlist); /* Print the resulting list of those that do not cut*/ ajDebug("Print out the list\n"); /* print the title */ if(html) ajFmtPrintF(outfile, "<H2>"); ajFmtPrintF(outfile, "\n\n# Enzymes that do not cut\n\n"); if(html) ajFmtPrintF(outfile, "</H2>\n"); if(html) ajFmtPrintF(outfile, "<PRE>"); /* ajListSort(nocutlist, ajStrVcmp);*/ niter = ajListIterNewread(nocutlist); i = 0; while((nocutname = (AjPStr)ajListIterGet(niter)) != NULL) { ajFmtPrintF(outfile, "%-10S", nocutname); /* new line after every 7 names printed */ if(i++ == 7) { ajFmtPrintF(outfile, "\n"); i = 0; } } ajListIterDel(&niter); /* end the output */ ajFmtPrintF(outfile, "\n"); if(html) {ajFmtPrintF(outfile, "</PRE>\n");} /* ** Print the count of rejected enzymes ** N.B. This is the count of ALL rejected enzymes including all ** isoschizomers */ if(html) ajFmtPrintF(outfile, "<H2>"); ajFmtPrintF(outfile, "\n\n# No. of cutting enzymes which do not match the\n" "# SITELEN, BLUNT, STICKY, COMMERCIAL, AMBIGUOUS criteria\n\n"); if(html) ajFmtPrintF(outfile, "</H2>\n"); ajFmtPrintF(outfile, "%d\n", rejected_count); ajDebug("Tidy up\n"); ajListstrFreeData(&nocutlist); ajListstrFreeData(&cutlist); return; }
int main(int argc, char** argv) { AjPFile outf = NULL; AjPFile cachef = NULL; AjIList iterator = NULL; AjPList aliases = NULL; AjPList dbas = NULL; AjPList species = NULL; AjPStr alias = NULL; AjPStr dbname = NULL; AjPStr spname = NULL; AjPStr svrname = NULL; AjPStr svrurl = NULL; AjPStr dbcurl = NULL; AjPTime svrtime = NULL; EnsEDatabaseadaptorGroup dbag = ensEDatabaseadaptorGroupNULL; EnsPDatabaseadaptor dba = NULL; EnsPDatabaseconnection dbc = NULL; embInit("cacheensembl", argc, argv); ensInit(); svrname = ajAcdGetString("servername"); outf = ajAcdGetOutfile("outfile"); cachef = ajAcdGetOutfile("cachefile"); dbcurl = ajStrNew(); svrurl = ajStrNew(); dbname = ajStrNew(); ajNamSvrGetUrl(svrname, &svrurl); if(!svrurl) ajFatal("Could not resolve server name '%S'.", svrname); dbc = ensDatabaseconnectionNewUrl(svrurl); ensRegistryLoadDatabaseconnection(dbc); ensDatabaseconnectionDel(&dbc); /* Write the server file header. */ svrtime = ajTimeNewTodayFmt("cachefile"); ajFmtPrintF(cachef, "# %S %D\n", ajFileGetNameS(cachef), svrtime); ajTimeDel(&svrtime); ajFmtPrintF(cachef, "# Automatically generated by cacheensembl " "for server '%S'.\n\n", svrname); /* ** Get all Ensembl Database Adaptor objects and write them as ** EMBOSS Database definitions. */ aliases = ajListstrNew(); dbas = ajListNew(); species = ajListstrNew(); ensRegistryRetrieveAllSpecies(species); while(ajListstrPop(species, &spname)) { ensRegistryGetAllDatabaseadaptors(ensEDatabaseadaptorGroupNULL, spname, dbas); while(ajListPop(dbas, (void**) &dba)) { dbag = ensDatabaseadaptorGetGroup(dba); if(dbag == ensEDatabaseadaptorGroupNULL) { ajDebug("cacheensembl main got unexpected " "Ensembl Database Adaptor Group %d.\n", dbag); continue; } ajStrAssignS(&dbname, ensDatabaseadaptorGetSpecies(dba)); if(dbag != ensEDatabaseadaptorGroupCore) { ajStrAppendC(&dbname, "_"); ajStrAppendC(&dbname, ensDatabaseadaptorGroupToChar(dbag)); } dbc = ensDatabaseadaptorGetDatabaseconnection(dba); ensDatabaseconnectionFetchUrl(dbc, &dbcurl); if(outf) ajFmtPrintF(outf, "%S\n", dbname); ajFmtPrintF(cachef, "DBNAME %S [\n", dbname); ajFmtPrintF(cachef, " release: \"%s\"\n", ensSoftwareGetVersion()); ajFmtPrintF(cachef, " server: \"%S\"\n", svrname); ajFmtPrintF(cachef, " url: \"%S\"\n", dbcurl); ajFmtPrintF(cachef, "]\n"); ajFmtPrintF(cachef, "\n"); if(dbag != ensEDatabaseadaptorGroupCore) continue; ensRegistryAliasFetchAllbySpecies( ensDatabaseadaptorGetSpecies(dba), aliases); /* ** Format all aliases to lower case, ** sort them alphabetically and remove duplicates. */ iterator = ajListIterNew(aliases); while(!ajListIterDone(iterator)) { alias = ajListstrIterGet(iterator); ajStrFmtLower(&alias); } ajListIterDel(&iterator); ajListSortUnique(aliases, cacheensembl_stringcompare, cacheensembl_stringdelete); alias = NULL; if(ajListGetLength(aliases) > 0) { while(ajListstrPop(aliases, &alias)) { /* ** Reject any aliases with other than alpha-numeric ** characters like white space. */ if(ajStrIsAlnum(alias)) ajFmtPrintF(cachef, "ALIAS %S %S\n", alias, ensDatabaseadaptorGetSpecies(dba)); ajStrDel(&alias); } ajFmtPrintF(cachef, "\n"); } /* Ensembl Database Adaptor objects *must not* be deleted. */ } ajStrDel(&spname); } ajListstrFree(&aliases); ajListFree(&dbas); ajStrDel(&dbcurl); ajStrDel(&svrurl); ajStrDel(&dbname); ajStrDel(&svrname); ajFileClose(&outf); ajFileClose(&cachef); embExit(); return EXIT_SUCCESS; }