static AjPTable btwisted_getdinucdata(AjPFile inf) { AjPStr valstr = NULL; AjPStr key = NULL; AjPStr line = NULL; AjPStrTok token = NULL; AjPTable table = NULL; valstr = ajStrNew(); line = ajStrNew(); table = ajTablestrNewCase(20); while(ajReadlineTrim(inf,&line)) { if(*ajStrGetPtr(line)=='#') continue; token = ajStrTokenNewC(line," \n\t\r"); key = ajStrNew(); ajStrTokenNextParseC(&token," \n\t\r",&key); valstr = ajStrNew(); ajStrTokenNextParseC(&token," \n\t\r",&valstr); ajTablePut(table,(void *)key,(void *) valstr); ajStrTokenDel(&token); } ajStrDel(&line); return table; }
static ajint cutgextract_readcodons(AjPFile inf, AjBool allrecords, ajint *count) { static int cutidx[] = { 42,43,46,41,45,44,26,30,31,29,27,28,48,51,47,50, 52,49,55,56,53,54,36,38,35,37, 4, 6, 3, 5,17,18, 16,15,57,59,60,58,24,25,34,33,39,40,20,19,11,12, 10, 9,63,62, 8, 7,14,13,21,23,22,32,61, 1, 0, 2 }; AjPStr line = NULL; AjPStr value = NULL; ajint thiscount[64]; AjPStrTok token = NULL; ajint i; ajint n = 0; ajint nstops = 0; if(!line) { line = ajStrNew(); value = ajStrNew(); } if(!ajReadlineTrim(inf,&line)) ajFatal("Premature end of file"); token = ajStrTokenNewC(line," \n\t\r"); for(i=0;i<CODONS;++i) { ajStrTokenNextParseC(&token," \n\t\r",&value); ajStrToInt(value,&n); thiscount[cutidx[i]] = n; if(i>60) nstops += n; } ajStrDel(&line); ajStrDel(&value); ajStrTokenDel(&token); if(!allrecords) if(nstops > 1) return -1; for(i=0;i<CODONS;++i) { count[i] += thiscount[i]; } return nstops; }
static void remap_NoCutList(AjPFile outfile, const AjPTable hittable, AjBool html, const AjPStr enzymes, AjBool blunt, AjBool sticky, ajuint sitelen, AjBool commercial, AjBool ambiguity, AjBool limit, const AjPTable retable) { /* for iterating over hittable */ PValue value; void **keyarray = NULL; /* array for table */ void **valarray = NULL; /* array for table */ ajint i; /* list of enzymes that cut */ AjPList cutlist; AjIList citer; /* iterator for cutlist */ AjPStr cutname = NULL; AjBool found; /* for parsing value->iso string */ AjPStrTok tok; char tokens[] = " ,"; AjPStr code = NULL; const char *p; /* for reading in enzymes names */ AjPFile enzfile = NULL; AjPStr *ea; ajint ne; /* number of enzymes */ AjBool isall = ajTrue; /* list of enzymes that don't cut */ AjPList nocutlist; AjIList niter; /* iterator for nocutlist */ AjPStr nocutname = NULL; /* count of rejected enzymes not matching criteria */ ajint rejected_count = 0; EmbPPatRestrict enz; /* for renaming preferred isoschizomers */ AjPList newlist; /* ** ** Make a list of enzymes('cutlist') that hit ** including the isoschizomer names ** */ ajDebug("Make a list of all enzymes that cut\n"); cutlist = ajListstrNew(); nocutlist = ajListstrNew(); ajTableToarrayKeysValues(hittable, &keyarray, &valarray); for(i = 0; keyarray[i]; i++) { value = (PValue) valarray[i]; cutname = ajStrNew(); ajStrAssignRef(&cutname, keyarray[i]); ajListstrPushAppend(cutlist, cutname); /* Add to cutlist all isoschizomers of enzymes that cut */ ajDebug("Add to cutlist all isoschizomers of enzymes that cut\n"); /* start token to parse isoschizomers names */ tok = ajStrTokenNewC(value->iso, tokens); while(ajStrTokenNextParseC(&tok, tokens, &code)) { cutname = ajStrNew(); ajStrAssignS(&cutname, code); ajListstrPushAppend(cutlist, cutname); } ajStrTokenDel(&tok); } ajStrDel(&code); AJFREE(keyarray); AJFREE(valarray); /* ** Read in list of enzymes ('nocutlist') - either all or ** the input enzyme list. ** Exclude those that don't match the selection criteria - count these. */ ajDebug("Read in a list of all input enzyme names\n"); ne = 0; if(!enzymes) isall = ajTrue; else { /* get input list of enzymes into ea[] */ ne = ajArrCommaList(enzymes, &ea); if(ajStrMatchCaseC(ea[0], "all")) isall = ajTrue; else { isall = ajFalse; for(i=0; i<ne; ++i) ajStrRemoveWhite(&ea[i]); } } enzfile = ajDatafileNewInNameC(ENZDATA); /* push all enzyme names without the required criteria onto nocutlist */ enz = embPatRestrictNew(); while(!ajFileIsEof(enzfile)) { if(!embPatRestrictReadEntry(enz, enzfile)) continue; /* ** If user entered explicit enzyme list, then check to see if ** this is one of that explicit list */ if(!isall) { found = AJFALSE; for(i=0; i<ne; ++i) if(ajStrMatchCaseS(ea[i], enz->cod)) { found = AJTRUE; break; } if(!found) /* not in the explicit list */ continue; ajDebug("RE %S is in the input explicit list of REs\n", enz->cod); } /* ignore ncuts==0 as they are unknown */ if(!enz->ncuts) { /* number of cut positions */ ajDebug("RE %S has an unknown number of cut positions\n", enz->cod); continue; } ajDebug("RE %S has a known number of cut sites\n", enz->cod); if(enz->len < sitelen) { /* recognition site length */ ajDebug("RE %S does not have a long enough recognition site\n", enz->cod); rejected_count++; continue; } if(!blunt && enz->blunt) { /* blunt/sticky */ ajDebug("RE %S is blunt\n", enz->cod); rejected_count++; continue; } if(!sticky && !enz->blunt) { /* blunt/sticky */ ajDebug("RE %S is sticky\n", enz->cod); rejected_count++; continue; } /* commercially available enzymes have uppercase patterns */ p = ajStrGetPtr(enz->pat); /* ** The -commercial qualifier is only used if we are searching ** through 'all' of the REBASE database - if we have specified an ** explicit list of enzymes then they are searched for whether or ** not they are commercially available */ if((*p >= 'a' && *p <= 'z') && commercial && isall) { ajDebug("RE %S is not commercial\n", enz->cod); rejected_count++; continue; } if(!ambiguity && remap_Ambiguous(enz->pat)) { ajDebug("RE %S is ambiguous\n", enz->cod); rejected_count++; continue; } ajDebug("RE %S matches all required criteria\n", enz->cod); code = ajStrNew(); ajStrAssignS(&code, enz->cod); ajListstrPushAppend(nocutlist, code); } embPatRestrictDel(&enz); ajFileClose(&enzfile); for(i=0; i<ne; ++i) if(ea[i]) ajStrDel(&ea[i]); if(ne) AJFREE(ea); /* ** Change names of enzymes in the non-cutter list ** to that of preferred (prototype) ** enzyme name so that the isoschizomers of cutters ** will be removed from the ** non-cutter list in the next bit. ** Remove duplicate prototype names. */ if(limit) { newlist = ajListstrNew(); remap_RenamePreferred(nocutlist, retable, newlist); ajListstrFreeData(&nocutlist); nocutlist = newlist; ajListSortUnique(nocutlist, remap_cmpcase, remap_strdel); } /* ** Iterate through the list of input enzymes removing those that are in ** the cutlist. */ ajDebug("Remove from the nocutlist all enzymes and isoschizomers " "that cut\n"); /* ** This steps down both lists at the same time, comparing names and ** iterating to the next name in whichever list whose name compares ** alphabetically before the other. Where a match is found, the ** nocutlist item is deleted. */ ajListSort(nocutlist, remap_cmpcase); ajListSort(cutlist, remap_cmpcase); citer = ajListIterNewread(cutlist); niter = ajListIterNew(nocutlist); /* while((cutname = (AjPStr)ajListIterGet(citer)) != NULL) ajDebug("dbg cutname = %S\n", cutname); */ nocutname = (AjPStr)ajListIterGet(niter); cutname = (AjPStr)ajListIterGet(citer); ajDebug("initial cutname, nocutname: '%S' '%S'\n", cutname, nocutname); while(nocutname != NULL && cutname != NULL) { i = ajStrCmpCaseS(cutname, nocutname); ajDebug("compare cutname, nocutname: %S %S ", cutname, nocutname); ajDebug("ajStrCmpCase=%d\n", i); if(i == 0) { /* match - so remove from nocutlist */ ajDebug("ajListstrRemove %S\n", nocutname); ajListstrIterRemove(niter); nocutname = (AjPStr)ajListIterGet(niter); /* ** Don't increment the cutname list pointer here ** - there may be more than one entry in the nocutname ** list with the same name because we have converted ** isoschizomers to their preferred name */ /* cutname = (AjPStr)ajListIterGet(citer); */ } else if(i == -1) /* cutlist name sorts before nocutlist name */ cutname = (AjPStr)ajListIterGet(citer); else if(i == 1) /* nocutlist name sorts before cutlist name */ nocutname = (AjPStr)ajListIterGet(niter); } ajListIterDel(&citer); ajListIterDel(&niter); ajListstrFreeData(&cutlist); /* Print the resulting list of those that do not cut*/ ajDebug("Print out the list\n"); /* print the title */ if(html) ajFmtPrintF(outfile, "<H2>"); ajFmtPrintF(outfile, "\n\n# Enzymes that do not cut\n\n"); if(html) ajFmtPrintF(outfile, "</H2>\n"); if(html) ajFmtPrintF(outfile, "<PRE>"); /* ajListSort(nocutlist, ajStrVcmp);*/ niter = ajListIterNewread(nocutlist); i = 0; while((nocutname = (AjPStr)ajListIterGet(niter)) != NULL) { ajFmtPrintF(outfile, "%-10S", nocutname); /* new line after every 7 names printed */ if(i++ == 7) { ajFmtPrintF(outfile, "\n"); i = 0; } } ajListIterDel(&niter); /* end the output */ ajFmtPrintF(outfile, "\n"); if(html) {ajFmtPrintF(outfile, "</PRE>\n");} /* ** Print the count of rejected enzymes ** N.B. This is the count of ALL rejected enzymes including all ** isoschizomers */ if(html) ajFmtPrintF(outfile, "<H2>"); ajFmtPrintF(outfile, "\n\n# No. of cutting enzymes which do not match the\n" "# SITELEN, BLUNT, STICKY, COMMERCIAL, AMBIGUOUS criteria\n\n"); if(html) ajFmtPrintF(outfile, "</H2>\n"); ajFmtPrintF(outfile, "%d\n", rejected_count); ajDebug("Tidy up\n"); ajListstrFreeData(&nocutlist); ajListstrFreeData(&cutlist); return; }
static void remap_RestrictPreferred(const AjPList l, const AjPTable t) { AjIList iter = NULL; EmbPMatMatch m = NULL; const AjPStr value = NULL; AjPStr newiso = NULL; AjBool found; /* name found in isoschizomer list */ /* for parsing value->iso string */ AjPStrTok tok = NULL; char tokens[] = " ,"; AjPStr code = NULL; iter = ajListIterNewread(l); while((m = (EmbPMatMatch)ajListIterGet(iter))) { found = ajFalse; /* get prototype name */ value = ajTableFetchS(t, m->cod); if(value) { ajStrAssignC(&newiso, ""); /* parse isoschizomer names from m->iso */ ajStrTokenDel(&tok); tok = ajStrTokenNewC(m->iso, tokens); while(ajStrTokenNextParseC(&tok, tokens, &code)) { if(ajStrGetLen(newiso) > 0) ajStrAppendC(&newiso, ","); /* found the prototype name? */ if(!ajStrCmpCaseS(code, value)) { ajStrAppendS(&newiso, m->cod); found = ajTrue; } else ajStrAppendS(&newiso, code); } ajStrTokenDel(&tok); /* if the name was not replaced, then add it in now */ if(!found) { if(ajStrGetLen(newiso) > 0) ajStrAppendC(&newiso, ","); ajStrAppendS(&newiso, m->cod); } ajDebug("RE: %S -> %S iso=%S newiso=%S\n", m->cod, value, m->iso, newiso); /* replace the old iso string with the new one */ ajStrAssignS(&m->iso, newiso); /* rename the enzyme to the prototype name */ ajStrAssignS(&m->cod, value); } } ajListIterDel(&iter); ajStrDel(&newiso); ajStrDel(&code); ajStrTokenDel(&tok); return; }
static void eprimer3_report(AjPFile outfile, const AjPStr output, ajint numreturn, ajint begin) { AjPStr line = NULL; AjPStrTok linetokenhandle; char eol[] = "\n\r"; AjPStrTok keytokenhandle; char equals[] = "="; AjPStr key = NULL; AjPStr value = NULL; AjBool gotsequenceid = AJFALSE; AjPTable table; linetokenhandle = ajStrTokenNewC(output, eol); /* get next line of relevant results */ while(ajStrTokenNextParseC(&linetokenhandle, eol, &line)) { if(!gotsequenceid) { /* ** Att the start of another sequence's results? ** Start storing the results in the table. */ if(ajStrCmpLenC(line, "PRIMER_SEQUENCE_ID=", 19) == 0) { gotsequenceid = AJTRUE; table = ajTablestrNew(TABLEGUESS); } else continue; } else { /* ** At the end of this sequence? - marked by a '=' in the primer3 ** output - then output the results. */ if(ajStrCmpC(line, "=") == 0) { gotsequenceid = AJFALSE; eprimer3_output_report(outfile, table, numreturn, begin); ajTablestrFree(&table); continue; } } /* ** store key and value in table and parse values ** when have all of the sequence ** results in the table because the LEFT, RIGHT ** and INTERNAL results for each ** resulting primer are interleaved */ keytokenhandle = ajStrTokenNewC(line, equals); key = ajStrNew(); ajStrTokenNextParse(&keytokenhandle, &key); value = ajStrNew(); ajStrTokenNextParse(&keytokenhandle, &value); ajDebug("key=%S\tvalue=%S\n", key, value); ajTablePut(table,(void *)key, (void *)value); ajStrTokenDel(&keytokenhandle); } ajStrDel(&line); ajStrTokenDel(&linetokenhandle); ajTablestrFree(&table); return; }
static char* cutgextract_next(AjPFile inf, const AjPStr wildspecies, AjPStr* pspecies, AjPStr* pdoc) { AjPStrTok handle = NULL; AjPStr token = NULL; ajint i; ajint len; char *p = NULL; char c; AjBool done = ajFalse; if(!cutgextractLine) cutgextractLine = ajStrNew(); if(!cutgextractOrg) cutgextractOrg = ajStrNew(); ajStrAssignC(&cutgextractLine,""); ajStrAssignC(pdoc,""); while (!done) { while(ajStrGetCharFirst(cutgextractLine) != '>') if(!ajReadlineTrim(inf,&cutgextractLine)) return NULL; handle = ajStrTokenNewC(cutgextractLine,"\\\n\t\r"); for(i=0;i<7;++i) { ajStrTokenNextParseC(&handle,"\\\n\t\r",&token); if(i==5) { ajStrAssignC(&cutgextractOrg,"E"); ajStrAppendS(&cutgextractOrg, token); ajStrAssignS(pspecies, token); if(ajStrMatchWildS(token,wildspecies)) { done = ajTrue; } } switch(i) { case 0: ajStrAppendC(pdoc, "#ID "); ajStrAppendS(pdoc, token); ajStrAppendC(pdoc, "\n"); break; case 1: ajStrAppendC(pdoc, "#AC "); ajStrAppendS(pdoc, token); ajStrAppendC(pdoc, "\n"); break; case 2: ajStrAppendC(pdoc, "#FT "); ajStrAppendS(pdoc, token); ajStrAppendC(pdoc, "\n"); break; case 3: ajStrAppendC(pdoc, "#FL "); ajStrAppendS(pdoc, token); ajStrAppendC(pdoc, "\n"); break; case 4: ajStrAppendC(pdoc, "#PI "); ajStrAppendS(pdoc, token); ajStrAppendC(pdoc, "\n"); ajStrAssignS(&cutgextractSavepid, token); break; case 5: ajStrAppendC(pdoc, "#OS "); ajStrAppendS(pdoc, token); ajStrAppendC(pdoc, "\n"); break; case 6: ajStrAppendC(pdoc, "#DE "); ajStrAppendS(pdoc, token); ajStrAppendC(pdoc, "\n"); break; default: break; } } ajStrTokenDel(&handle); ajStrDel(&token); if(!done) if(!ajReadlineTrim(inf,&cutgextractLine)) return NULL; } len = ajStrGetLen(cutgextractOrg); p = ajStrGetuniquePtr(&cutgextractOrg); for(i=0;i<len;++i) { c = p[i]; if(c=='/' || c==' ' || c=='.' || c=='\'') p[i]='_'; } if(p[strlen(p)-1]=='_') p[strlen(p)-1]='\0'; ajStrDel(&token); ajStrTokenDel(&handle); return p; }