static AjPTable btwisted_getdinucdata(AjPFile inf) { AjPStr valstr = NULL; AjPStr key = NULL; AjPStr line = NULL; AjPStrTok token = NULL; AjPTable table = NULL; valstr = ajStrNew(); line = ajStrNew(); table = ajTablestrNewCase(20); while(ajReadlineTrim(inf,&line)) { if(*ajStrGetPtr(line)=='#') continue; token = ajStrTokenNewC(line," \n\t\r"); key = ajStrNew(); ajStrTokenNextParseC(&token," \n\t\r",&key); valstr = ajStrNew(); ajStrTokenNextParseC(&token," \n\t\r",&valstr); ajTablePut(table,(void *)key,(void *) valstr); ajStrTokenDel(&token); } ajStrDel(&line); return table; }
static ajint cutgextract_readcodons(AjPFile inf, AjBool allrecords, ajint *count) { static int cutidx[] = { 42,43,46,41,45,44,26,30,31,29,27,28,48,51,47,50, 52,49,55,56,53,54,36,38,35,37, 4, 6, 3, 5,17,18, 16,15,57,59,60,58,24,25,34,33,39,40,20,19,11,12, 10, 9,63,62, 8, 7,14,13,21,23,22,32,61, 1, 0, 2 }; AjPStr line = NULL; AjPStr value = NULL; ajint thiscount[64]; AjPStrTok token = NULL; ajint i; ajint n = 0; ajint nstops = 0; if(!line) { line = ajStrNew(); value = ajStrNew(); } if(!ajReadlineTrim(inf,&line)) ajFatal("Premature end of file"); token = ajStrTokenNewC(line," \n\t\r"); for(i=0;i<CODONS;++i) { ajStrTokenNextParseC(&token," \n\t\r",&value); ajStrToInt(value,&n); thiscount[cutidx[i]] = n; if(i>60) nstops += n; } ajStrDel(&line); ajStrDel(&value); ajStrTokenDel(&token); if(!allrecords) if(nstops > 1) return -1; for(i=0;i<CODONS;++i) { count[i] += thiscount[i]; } return nstops; }
static void remap_NoCutList(AjPFile outfile, const AjPTable hittable, AjBool html, const AjPStr enzymes, AjBool blunt, AjBool sticky, ajuint sitelen, AjBool commercial, AjBool ambiguity, AjBool limit, const AjPTable retable) { /* for iterating over hittable */ PValue value; void **keyarray = NULL; /* array for table */ void **valarray = NULL; /* array for table */ ajint i; /* list of enzymes that cut */ AjPList cutlist; AjIList citer; /* iterator for cutlist */ AjPStr cutname = NULL; AjBool found; /* for parsing value->iso string */ AjPStrTok tok; char tokens[] = " ,"; AjPStr code = NULL; const char *p; /* for reading in enzymes names */ AjPFile enzfile = NULL; AjPStr *ea; ajint ne; /* number of enzymes */ AjBool isall = ajTrue; /* list of enzymes that don't cut */ AjPList nocutlist; AjIList niter; /* iterator for nocutlist */ AjPStr nocutname = NULL; /* count of rejected enzymes not matching criteria */ ajint rejected_count = 0; EmbPPatRestrict enz; /* for renaming preferred isoschizomers */ AjPList newlist; /* ** ** Make a list of enzymes('cutlist') that hit ** including the isoschizomer names ** */ ajDebug("Make a list of all enzymes that cut\n"); cutlist = ajListstrNew(); nocutlist = ajListstrNew(); ajTableToarrayKeysValues(hittable, &keyarray, &valarray); for(i = 0; keyarray[i]; i++) { value = (PValue) valarray[i]; cutname = ajStrNew(); ajStrAssignRef(&cutname, keyarray[i]); ajListstrPushAppend(cutlist, cutname); /* Add to cutlist all isoschizomers of enzymes that cut */ ajDebug("Add to cutlist all isoschizomers of enzymes that cut\n"); /* start token to parse isoschizomers names */ tok = ajStrTokenNewC(value->iso, tokens); while(ajStrTokenNextParseC(&tok, tokens, &code)) { cutname = ajStrNew(); ajStrAssignS(&cutname, code); ajListstrPushAppend(cutlist, cutname); } ajStrTokenDel(&tok); } ajStrDel(&code); AJFREE(keyarray); AJFREE(valarray); /* ** Read in list of enzymes ('nocutlist') - either all or ** the input enzyme list. ** Exclude those that don't match the selection criteria - count these. */ ajDebug("Read in a list of all input enzyme names\n"); ne = 0; if(!enzymes) isall = ajTrue; else { /* get input list of enzymes into ea[] */ ne = ajArrCommaList(enzymes, &ea); if(ajStrMatchCaseC(ea[0], "all")) isall = ajTrue; else { isall = ajFalse; for(i=0; i<ne; ++i) ajStrRemoveWhite(&ea[i]); } } enzfile = ajDatafileNewInNameC(ENZDATA); /* push all enzyme names without the required criteria onto nocutlist */ enz = embPatRestrictNew(); while(!ajFileIsEof(enzfile)) { if(!embPatRestrictReadEntry(enz, enzfile)) continue; /* ** If user entered explicit enzyme list, then check to see if ** this is one of that explicit list */ if(!isall) { found = AJFALSE; for(i=0; i<ne; ++i) if(ajStrMatchCaseS(ea[i], enz->cod)) { found = AJTRUE; break; } if(!found) /* not in the explicit list */ continue; ajDebug("RE %S is in the input explicit list of REs\n", enz->cod); } /* ignore ncuts==0 as they are unknown */ if(!enz->ncuts) { /* number of cut positions */ ajDebug("RE %S has an unknown number of cut positions\n", enz->cod); continue; } ajDebug("RE %S has a known number of cut sites\n", enz->cod); if(enz->len < sitelen) { /* recognition site length */ ajDebug("RE %S does not have a long enough recognition site\n", enz->cod); rejected_count++; continue; } if(!blunt && enz->blunt) { /* blunt/sticky */ ajDebug("RE %S is blunt\n", enz->cod); rejected_count++; continue; } if(!sticky && !enz->blunt) { /* blunt/sticky */ ajDebug("RE %S is sticky\n", enz->cod); rejected_count++; continue; } /* commercially available enzymes have uppercase patterns */ p = ajStrGetPtr(enz->pat); /* ** The -commercial qualifier is only used if we are searching ** through 'all' of the REBASE database - if we have specified an ** explicit list of enzymes then they are searched for whether or ** not they are commercially available */ if((*p >= 'a' && *p <= 'z') && commercial && isall) { ajDebug("RE %S is not commercial\n", enz->cod); rejected_count++; continue; } if(!ambiguity && remap_Ambiguous(enz->pat)) { ajDebug("RE %S is ambiguous\n", enz->cod); rejected_count++; continue; } ajDebug("RE %S matches all required criteria\n", enz->cod); code = ajStrNew(); ajStrAssignS(&code, enz->cod); ajListstrPushAppend(nocutlist, code); } embPatRestrictDel(&enz); ajFileClose(&enzfile); for(i=0; i<ne; ++i) if(ea[i]) ajStrDel(&ea[i]); if(ne) AJFREE(ea); /* ** Change names of enzymes in the non-cutter list ** to that of preferred (prototype) ** enzyme name so that the isoschizomers of cutters ** will be removed from the ** non-cutter list in the next bit. ** Remove duplicate prototype names. */ if(limit) { newlist = ajListstrNew(); remap_RenamePreferred(nocutlist, retable, newlist); ajListstrFreeData(&nocutlist); nocutlist = newlist; ajListSortUnique(nocutlist, remap_cmpcase, remap_strdel); } /* ** Iterate through the list of input enzymes removing those that are in ** the cutlist. */ ajDebug("Remove from the nocutlist all enzymes and isoschizomers " "that cut\n"); /* ** This steps down both lists at the same time, comparing names and ** iterating to the next name in whichever list whose name compares ** alphabetically before the other. Where a match is found, the ** nocutlist item is deleted. */ ajListSort(nocutlist, remap_cmpcase); ajListSort(cutlist, remap_cmpcase); citer = ajListIterNewread(cutlist); niter = ajListIterNew(nocutlist); /* while((cutname = (AjPStr)ajListIterGet(citer)) != NULL) ajDebug("dbg cutname = %S\n", cutname); */ nocutname = (AjPStr)ajListIterGet(niter); cutname = (AjPStr)ajListIterGet(citer); ajDebug("initial cutname, nocutname: '%S' '%S'\n", cutname, nocutname); while(nocutname != NULL && cutname != NULL) { i = ajStrCmpCaseS(cutname, nocutname); ajDebug("compare cutname, nocutname: %S %S ", cutname, nocutname); ajDebug("ajStrCmpCase=%d\n", i); if(i == 0) { /* match - so remove from nocutlist */ ajDebug("ajListstrRemove %S\n", nocutname); ajListstrIterRemove(niter); nocutname = (AjPStr)ajListIterGet(niter); /* ** Don't increment the cutname list pointer here ** - there may be more than one entry in the nocutname ** list with the same name because we have converted ** isoschizomers to their preferred name */ /* cutname = (AjPStr)ajListIterGet(citer); */ } else if(i == -1) /* cutlist name sorts before nocutlist name */ cutname = (AjPStr)ajListIterGet(citer); else if(i == 1) /* nocutlist name sorts before cutlist name */ nocutname = (AjPStr)ajListIterGet(niter); } ajListIterDel(&citer); ajListIterDel(&niter); ajListstrFreeData(&cutlist); /* Print the resulting list of those that do not cut*/ ajDebug("Print out the list\n"); /* print the title */ if(html) ajFmtPrintF(outfile, "<H2>"); ajFmtPrintF(outfile, "\n\n# Enzymes that do not cut\n\n"); if(html) ajFmtPrintF(outfile, "</H2>\n"); if(html) ajFmtPrintF(outfile, "<PRE>"); /* ajListSort(nocutlist, ajStrVcmp);*/ niter = ajListIterNewread(nocutlist); i = 0; while((nocutname = (AjPStr)ajListIterGet(niter)) != NULL) { ajFmtPrintF(outfile, "%-10S", nocutname); /* new line after every 7 names printed */ if(i++ == 7) { ajFmtPrintF(outfile, "\n"); i = 0; } } ajListIterDel(&niter); /* end the output */ ajFmtPrintF(outfile, "\n"); if(html) {ajFmtPrintF(outfile, "</PRE>\n");} /* ** Print the count of rejected enzymes ** N.B. This is the count of ALL rejected enzymes including all ** isoschizomers */ if(html) ajFmtPrintF(outfile, "<H2>"); ajFmtPrintF(outfile, "\n\n# No. of cutting enzymes which do not match the\n" "# SITELEN, BLUNT, STICKY, COMMERCIAL, AMBIGUOUS criteria\n\n"); if(html) ajFmtPrintF(outfile, "</H2>\n"); ajFmtPrintF(outfile, "%d\n", rejected_count); ajDebug("Tidy up\n"); ajListstrFreeData(&nocutlist); ajListstrFreeData(&cutlist); return; }
static void remap_RestrictPreferred(const AjPList l, const AjPTable t) { AjIList iter = NULL; EmbPMatMatch m = NULL; const AjPStr value = NULL; AjPStr newiso = NULL; AjBool found; /* name found in isoschizomer list */ /* for parsing value->iso string */ AjPStrTok tok = NULL; char tokens[] = " ,"; AjPStr code = NULL; iter = ajListIterNewread(l); while((m = (EmbPMatMatch)ajListIterGet(iter))) { found = ajFalse; /* get prototype name */ value = ajTableFetchS(t, m->cod); if(value) { ajStrAssignC(&newiso, ""); /* parse isoschizomer names from m->iso */ ajStrTokenDel(&tok); tok = ajStrTokenNewC(m->iso, tokens); while(ajStrTokenNextParseC(&tok, tokens, &code)) { if(ajStrGetLen(newiso) > 0) ajStrAppendC(&newiso, ","); /* found the prototype name? */ if(!ajStrCmpCaseS(code, value)) { ajStrAppendS(&newiso, m->cod); found = ajTrue; } else ajStrAppendS(&newiso, code); } ajStrTokenDel(&tok); /* if the name was not replaced, then add it in now */ if(!found) { if(ajStrGetLen(newiso) > 0) ajStrAppendC(&newiso, ","); ajStrAppendS(&newiso, m->cod); } ajDebug("RE: %S -> %S iso=%S newiso=%S\n", m->cod, value, m->iso, newiso); /* replace the old iso string with the new one */ ajStrAssignS(&m->iso, newiso); /* rename the enzyme to the prototype name */ ajStrAssignS(&m->cod, value); } } ajListIterDel(&iter); ajStrDel(&newiso); ajStrDel(&code); ajStrTokenDel(&tok); return; }
int main(int argc, char *argv[]) { embInitPV("gseqinfo", argc, argv, "GEMBASSY", "1.0.1"); struct soap soap; AjPSeqall seqall; AjPSeq seq; AjPStr inseq = NULL; AjPStr seqid = NULL; AjPStr tmp = NULL; AjPStr parse = NULL; AjPStr numA = NULL; AjPStr numT = NULL; AjPStr numG = NULL; AjPStr numC = NULL; AjPStrTok handle = NULL; ajint n; char *in0; char *result; AjBool show = 0; AjPFile outf = NULL; seqall = ajAcdGetSeqall("sequence"); outf = ajAcdGetOutfile("outfile"); while(ajSeqallNext(seqall, &seq)) { soap_init(&soap); inseq = NULL; ajStrAppendC(&inseq, ">"); ajStrAppendS(&inseq, ajSeqGetNameS(seq)); ajStrAppendC(&inseq, "\n"); ajStrAppendS(&inseq, ajSeqGetSeqS(seq)); ajStrAssignS(&seqid, ajSeqGetAccS(seq)); in0 = ajCharNewS(inseq); if(soap_call_ns1__seqinfo( &soap, NULL, NULL, in0, &result ) == SOAP_OK) { tmp = ajStrNewC(result); ajStrExchangeCC(&tmp, "<", "\n"); ajStrExchangeCC(&tmp, ">", "\n"); handle = ajStrTokenNewC(tmp, "\n"); while(ajStrTokenNextParse(handle, &parse)) { if(ajStrIsInt(parse)) if(!numA) numA = ajStrNewS(parse); else if(!numT) numT = ajStrNewS(parse); else if(!numG) numG = ajStrNewS(parse); else if(!numC) numC = ajStrNewS(parse); } if(show) ajFmtPrint("Sequence: %S A: %S T: %S G: %S C: %S\n", seqid, numA, numT, numG, numC); else ajFmtPrintF(outf, "Sequence: %S A: %S T: %S G: %S C: %S\n", seqid, numA, numT, numG, numC); } else { soap_print_fault(&soap, stderr); } soap_destroy(&soap); soap_end(&soap); soap_done(&soap); AJFREE(in0); ajStrDel(&inseq); } ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&seqid); embExit(); return 0; }
static AjBool extractfeat_MatchFeature(const AjPFeature gf, const AjPStr source, const AjPStr type, ajint sense, AjBool testscore, float minscore, float maxscore, const AjPStr tag, const AjPStr value, AjBool *tagsmatch) { AjPStrTok tokens = NULL; AjPStr key = NULL; AjBool val = ajFalse; /* ** is this a child of a join() ? ** if it is a child, then we use the previous result of MatchPatternTags */ if(!ajFeatIsMultiple(gf) || !ajFeatIsChild(gf)) *tagsmatch = extractfeat_MatchPatternTags(gf, tag, value); /* ignore remote IDs */ /* ** No - don't reject remote IDs here. We want to include all features ** in this routine so that we can reject a whole join later on if any one ** member of the join is remote. ** if(!ajFeatIsLocal(gf)) ** return ajFalse; */ /* check source, type, sense, score, tags, values */ /* Special values indication that we match anything: ** for strings, '*' ** for sense, 0 ** for score, maxscore <= minscore */ ajDebug("extractfeat_MatchFeature\n"); ajDebug("embMiscMatchPatternDelim(ajFeatGetSource(gf), source) %B\n", embMiscMatchPatternDelimC(ajFeatGetSource(gf), source, ",;|")); ajDebug("ajFeatTypeMatchS(gf, type) %B\n", ajFeatTypeMatchWildS(gf, type)); ajDebug("ajFeatGetStrand(gf) '%x' sense %d\n", ajFeatGetStrand(gf), sense); ajDebug("testscore: %B ajFeatGetScore(gf): %f minscore:%f maxscore:%f\n", testscore, ajFeatGetScore(gf), minscore, maxscore); if(!embMiscMatchPatternDelimC(ajFeatGetSource(gf), source, ",;|")) return ajFalse; if(ajStrGetLen(type)) { val = ajFalse; tokens = ajStrTokenNewC(type, " \t\n\r,;|"); while (ajStrTokenNextParse( &tokens, &key)) { if (ajFeatTypeMatchWildS(gf, key)) { val = ajTrue; break; } } ajStrTokenDel( &tokens); ajStrDel(&key); if(!val) return ajFalse; } if(ajFeatGetStrand(gf) == '+' && sense == -1) return ajFalse; if(ajFeatGetStrand(gf) == '-' && sense == +1) return ajFalse; if(testscore && ajFeatGetScore(gf) < minscore) return ajFalse; if(testscore && ajFeatGetScore(gf) > maxscore) return ajFalse; if(!*tagsmatch) return ajFalse; ajDebug("All tests passed, return ajTrue\n"); /*ajUser("All tests passed, return ajTrue");*/ return ajTrue; }
int main(int argc, char *argv[]) { embInitPV("ggcsi", argc, argv, "GEMBASSY", "1.0.1"); struct soap soap; struct ns1__gcsiInputParams params; AjPSeqall seqall; AjPSeq seq; AjPStr inseq = NULL; AjPStr seqid = NULL; ajint window = 0; AjBool at = 0; AjBool purine = 0; AjBool keto = 0; AjBool pval = 0; AjPStr version = NULL; AjBool accid = ajFalse; AjPStr tmp = NULL; AjPStr parse = NULL; AjPStr gcsi = NULL; AjPStr sa = NULL; AjPStr dist = NULL; AjPStr z = NULL; AjPStr p = NULL; AjPStrTok handle = NULL; char *in0; char *result; AjPFile outf = NULL; seqall = ajAcdGetSeqall("sequence"); window = ajAcdGetInt("window"); at = ajAcdGetBoolean("at"); purine = ajAcdGetBoolean("purine"); keto = ajAcdGetBoolean("keto"); pval = ajAcdGetBoolean("pval"); version = ajAcdGetSelectSingle("gcsi"); accid = ajAcdGetBoolean("accid"); outf = ajAcdGetOutfile("outfile"); params.window = window; params.at = 0; params.purine = 0; params.keto = 0; params.p = 0; ajStrToInt(version, &(params.version)); if(at) params.at = 1; if(purine) params.purine = 1; if(keto) params.keto = 1; if(pval) params.p = 1; while(ajSeqallNext(seqall, &seq)) { soap_init(&soap); inseq = NULL; ajStrAppendC(&inseq, ">"); ajStrAppendS(&inseq, ajSeqGetNameS(seq)); ajStrAppendC(&inseq, "\n"); ajStrAppendS(&inseq, ajSeqGetSeqS(seq)); ajStrAssignS(&seqid, ajSeqGetAccS(seq)); in0 = ajCharNewS(inseq); if (soap_call_ns1__gcsi( &soap, NULL, NULL, in0, ¶ms, &result ) == SOAP_OK) { tmp = ajStrNew(); parse = ajStrNew(); gcsi = ajStrNew(); sa = ajStrNew(); dist = ajStrNew(); z = ajStrNew(); p = ajStrNew(); ajStrAssignC(&tmp, result); ajStrExchangeCC(&tmp, "<", "\n"); ajStrExchangeCC(&tmp, ">", "\n"); handle = ajStrTokenNewC(tmp, "\n"); while (ajStrTokenNextParse(&handle, &parse)) { if (ajStrIsFloat(parse)) { if(!ajStrGetLen(gcsi)) ajStrAssignS(&gcsi, parse); else if(!ajStrGetLen(sa)) ajStrAssignS(&sa, parse); else if(!ajStrGetLen(dist)) ajStrAssignS(&dist, parse); else if(!ajStrGetLen(z)) ajStrAssignS(&z, parse); else if(!ajStrGetLen(p)) ajStrAssignS(&p, parse); } } tmp = ajFmtStr("Sequence: %S GCSI: %S SA: %S DIST: %S", seqid, gcsi, sa, dist); if(pval) tmp = ajFmtStr("%S Z: %S P: %S", tmp, z, p); ajFmtPrintF(outf, "%S\n", tmp); ajStrDel(&tmp); ajStrDel(&parse); ajStrDel(&gcsi); ajStrDel(&sa); ajStrDel(&dist); ajStrDel(&z); ajStrDel(&p); } else { soap_print_fault(&soap, stderr); } soap_destroy(&soap); soap_end(&soap); soap_done(&soap); AJFREE(in0); ajStrDel(&inseq); } ajFileClose(&outf); ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&seqid); embExit(); return 0; }
static void primersearch_read_primers(AjPList *primerList, AjPFile primerFile, ajint mmp) { AjPStr rdline = NULL; AjPStrTok handle = NULL; ajint nprimers = 0; Primer primdata = NULL; while (ajReadlineTrim(primerFile, &rdline)) { primdata = NULL; if (ajStrGetCharFirst(rdline) == '#') continue; if (ajStrSuffixC(rdline, "..")) continue; AJNEW(primdata); primdata->Name = NULL; primersearch_initialise_pguts(&primdata->forward); primersearch_initialise_pguts(&primdata->reverse); primdata->hitlist = ajListNew(); handle = ajStrTokenNewC(rdline, " \t"); ajStrTokenNextParse(&handle, &primdata->Name); ajStrTokenNextParse(&handle, &primdata->forward->patstr); ajStrFmtUpper(&primdata->forward->patstr); ajStrTokenNextParse(&handle, &primdata->reverse->patstr); ajStrFmtUpper(&primdata->reverse->patstr); ajStrTokenDel(&handle); /* copy patterns for Henry Spencer code */ ajStrAssignC(&primdata->forward->origpat, ajStrGetPtr(primdata->forward->patstr)); ajStrAssignC(&primdata->reverse->origpat, ajStrGetPtr(primdata->reverse->patstr)); /* set the mismatch level */ primdata->forward->mm = (ajint) (ajStrGetLen(primdata->forward->patstr)*mmp)/100; primdata->reverse->mm = (ajint) (ajStrGetLen(primdata->reverse->patstr)*mmp)/100; if(primersearch_classify_and_compile(&primdata)) { ajListPushAppend(*primerList, primdata); nprimers++; } else /* there was something funny about the primer sequences */ { ajUser("Cannot use %s\n", ajStrGetPtr(primdata->Name)); primersearch_free_pguts(&primdata->forward); primersearch_free_pguts(&primdata->reverse); ajStrDel(&primdata->Name); ajListFree(&primdata->hitlist); ajListFree(&primdata->hitlist); AJFREE(primdata); } } ajStrDel(&rdline); return; }
int main(int argc, char *argv[]) { embInitPV("genret", argc, argv, "GEMBASSY", "1.0.3"); AjPSeqall seqall; AjPSeq seq = NULL; AjPStr inseq = NULL; AjPStr gene = NULL; AjPStr access = NULL; AjBool accid = ajTrue; AjPStr argument = NULL; AjPFile outfile = NULL; AjPStr seqid = NULL; AjPStr restid = NULL; AjBool valid = ajFalse; AjBool isseq = ajFalse; AjBool isgbk = ajFalse; AjPFilebuff buff = NULL; AjPFile tmpfile = NULL; AjPStr tmpname = NULL; AjPStr regexstr = NULL; AjPStrTok token = NULL; AjPRegexp regex = NULL; AjPStr url = NULL; AjPStr base = NULL; AjPStr head = NULL; AjPStr line = NULL; seqall = ajAcdGetSeqall("sequence"); access = ajAcdGetString("access"); gene = ajAcdGetString("gene"); argument = ajAcdGetString("argument"); accid = ajAcdGetBoolean("accid"); outfile = ajAcdGetOutfile("outfile"); if( ajStrMatchC(access, "translation") || ajStrMatchC(access, "get_exon") || ajStrMatchC(access, "get_exons") || ajStrMatchC(access, "get_cdsseq") || ajStrMatchC(access, "get_gbkseq") || ajStrMatchC(access, "get_geneseq") || ajStrMatchC(access, "get_intron") || ajStrMatchC(access, "getseq") || ajStrMatchC(access, "seq") || ajStrMatchC(access, "around_startcodon") || ajStrMatchC(access, "around_stopcodon") || ajStrMatchC(access, "before_startcodon") || ajStrMatchC(access, "before_stopcodon") || ajStrMatchC(access, "after_startcodon") || ajStrMatchC(access, "after_stopcodon") ) { isseq = ajTrue; } else if(ajStrMatchC(access, "annotate") || ajStrMatchC(access, "output")) { isgbk = ajTrue; } else { ajFmtPrintF(outfile, "gene,%S\n", access); } base = ajStrNewC("rest.g-language.org"); ajStrExchangeCC(&argument, " ", "/"); ajStrExchangeCC(&argument, ",", "/"); ajStrExchangeCC(&argument, "\t", "/"); ajStrExchangeCC(&argument, "\r", "/"); ajStrExchangeCC(&argument, "\n", "/"); if(ajStrMatchC(gene, "*")) { ajStrInsertK(&gene, 0, '.'); } if(ajStrPrefixC(gene, "@") || ajStrPrefixC(gene, "list::")) { ajStrExchangeCC(&gene, "@", ""); ajStrExchangeCC(&gene, "list::", ""); ajStrAssignS(&tmpname, gene); tmpfile = ajFileNewInNameS(tmpname); if(!tmpfile) { ajDie("List file (%S) open error\n", tmpname); } gene = ajStrNew(); while(ajReadline(tmpfile, &line)) { ajStrAppendS(&gene, line); } ajFileClose(&tmpfile); ajStrDel(&tmpname); ajStrDel(&line); } tmpname = ajStrNew(); gAssignUniqueName(&tmpname); while(ajSeqallNext(seqall, &seq)) { inseq = ajStrNew(); if(!accid) { if(gFormatGenbank(seq, &inseq)) { tmpfile = ajFileNewOutNameS(tmpname); if(!tmpfile) { ajDie("Output file (%S) open error\n", tmpname); } ajFmtPrintF(tmpfile, "%S", inseq); ajFileClose(&tmpfile); ajFmtPrintS(&url, "http://%S/upload/upl.pl", base); gFilePostSS(url, tmpname, &restid); ajStrDel(&url); ajSysFileUnlinkS(tmpname); } else { ajWarn("Sequence does not have features\n" "Proceeding with sequence accession ID\n"); accid = ajTrue; } } ajStrAssignS(&seqid, ajSeqGetAccS(seq)); if(ajStrGetLen(seqid) == 0) { ajStrAssignS(&seqid, ajSeqGetNameS(seq)); } if(ajStrGetLen(seqid) == 0) { ajWarn("No valid header information\n"); } if(accid) { ajStrAssignS(&restid, seqid); if(ajStrGetLen(seqid) == 0) { ajDie("Cannot proceed without header with -accid\n"); } if(!gValID(seqid)) { ajDie("Invalid accession ID:%S, exiting\n", seqid); } } url = ajStrNew(); if(isgbk) { ajFmtPrintS(&url, "http://%S/%S/%S", base, restid, access); } else { ajFmtPrintS(&url, "http://%S/%S/*/%S/%S", base, restid, access, argument); } if(!gFilebuffURLS(url, &buff)) { ajDie("GET error from %S\n", url); } while(ajBuffreadLine(buff, &line)) { if(isgbk){ ajFmtPrintF(outfile, "%S", line); continue; } ajStrRemoveLastNewline(&line); regex = ajRegCompC("^>"); if(ajRegExec(regex, line)) { head = ajStrNew(); ajStrAssignS(&head, line); ajStrTrimStartC(&head, ">"); valid = ajFalse; token = ajStrTokenNewC(ajStrNewS(gene), " ,\t\r\n"); while(ajStrTokenNextParse(token, ®exstr)) { if(ajStrGetLen(regexstr)) { regex = ajRegComp(regexstr); if(ajRegExec(regex, line)) { valid = ajTrue; if(ajStrIsAlnum(regexstr)) { ajStrExchangeSC(&gene, regexstr, ""); } } ajRegFree(®ex); } } } else { if(valid) { if(isseq) { ajStrFmtWrap(&line, 60); ajFmtPrintF(outfile, ">%S\n%S\n", head, line); } else { ajFmtPrintF(outfile, "%S,%S\n", head, line); } valid = ajFalse; } } } ajFileClose(&outfile); ajStrDel(&restid); ajStrDel(&seqid); ajStrDel(&inseq); } ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&access); ajStrDel(&gene); embExit(); }
static void eprimer3_report(AjPFile outfile, const AjPStr output, ajint numreturn, ajint begin) { AjPStr line = NULL; AjPStrTok linetokenhandle; char eol[] = "\n\r"; AjPStrTok keytokenhandle; char equals[] = "="; AjPStr key = NULL; AjPStr value = NULL; AjBool gotsequenceid = AJFALSE; AjPTable table; linetokenhandle = ajStrTokenNewC(output, eol); /* get next line of relevant results */ while(ajStrTokenNextParseC(&linetokenhandle, eol, &line)) { if(!gotsequenceid) { /* ** Att the start of another sequence's results? ** Start storing the results in the table. */ if(ajStrCmpLenC(line, "PRIMER_SEQUENCE_ID=", 19) == 0) { gotsequenceid = AJTRUE; table = ajTablestrNew(TABLEGUESS); } else continue; } else { /* ** At the end of this sequence? - marked by a '=' in the primer3 ** output - then output the results. */ if(ajStrCmpC(line, "=") == 0) { gotsequenceid = AJFALSE; eprimer3_output_report(outfile, table, numreturn, begin); ajTablestrFree(&table); continue; } } /* ** store key and value in table and parse values ** when have all of the sequence ** results in the table because the LEFT, RIGHT ** and INTERNAL results for each ** resulting primer are interleaved */ keytokenhandle = ajStrTokenNewC(line, equals); key = ajStrNew(); ajStrTokenNextParse(&keytokenhandle, &key); value = ajStrNew(); ajStrTokenNextParse(&keytokenhandle, &value); ajDebug("key=%S\tvalue=%S\n", key, value); ajTablePut(table,(void *)key, (void *)value); ajStrTokenDel(&keytokenhandle); } ajStrDel(&line); ajStrTokenDel(&linetokenhandle); ajTablestrFree(&table); return; }
int main(int argc, char **argv) { AjPSeqall seqall; AjPFile primfile; AjPStr rdline = NULL; Primer primdata; AjPStrTok handle = NULL; AjPList primList = NULL; embInit("stssearch", argc, argv); primfile = ajAcdGetInfile("infile"); out = ajAcdGetOutfile("outfile"); seqall = ajAcdGetSeqall("seqall"); while(ajReadlineTrim(primfile, &rdline)) { if(ajStrGetCharFirst(rdline) == '#') continue; if(ajStrSuffixC(rdline, "..")) continue; AJNEW(primdata); primdata->Name = NULL; primdata->Oligoa = NULL; primdata->Oligob = NULL; handle = ajStrTokenNewC(rdline, " \t"); ajStrTokenNextParse(&handle, &primdata->Name); if(!(nprimers % 1000)) ajDebug("Name [%d]: '%S'\n", nprimers, primdata->Name); ajStrTokenNextParse(&handle, &primdata->Oligoa); ajStrFmtUpper(&primdata->Oligoa); primdata->Prima = ajRegComp(primdata->Oligoa); ajStrTokenNextParse(&handle, &primdata->Oligob); ajStrFmtUpper(&primdata->Oligob); primdata->Primb = ajRegComp(primdata->Oligob); ajStrTokenDel(&handle); if(!nprimers) primList = ajListNew(); ajListPushAppend(primList, primdata); nprimers++; } if(!nprimers) ajFatal("No primers read\n"); ajDebug("%d primers read\n", nprimers); while(ajSeqallNext(seqall, &seq)) { ajSeqFmtUpper(seq); ajStrAssignS(&seqstr, ajSeqGetSeqS(seq)); ajStrAssignS(&revstr, ajSeqGetSeqS(seq)); ajSeqstrReverse(&revstr); ajDebug("Testing: %s\n", ajSeqGetNameC(seq)); ntests = 0; ajListMap(primList, stssearch_primTest, NULL); } ajFileClose(&out); ajSeqallDel(&seqall); ajSeqDel(&seq); ajFileClose(&out); ajStrDel(&revstr); ajStrDel(&seqstr); ajFileClose(&primfile); ajListMap(primList, stssearch_primDel, NULL); ajListFree(&primList); ajStrDel(&rdline); embExit(); return 0; }
static char* cutgextract_next(AjPFile inf, const AjPStr wildspecies, AjPStr* pspecies, AjPStr* pdoc) { AjPStrTok handle = NULL; AjPStr token = NULL; ajint i; ajint len; char *p = NULL; char c; AjBool done = ajFalse; if(!cutgextractLine) cutgextractLine = ajStrNew(); if(!cutgextractOrg) cutgextractOrg = ajStrNew(); ajStrAssignC(&cutgextractLine,""); ajStrAssignC(pdoc,""); while (!done) { while(ajStrGetCharFirst(cutgextractLine) != '>') if(!ajReadlineTrim(inf,&cutgextractLine)) return NULL; handle = ajStrTokenNewC(cutgextractLine,"\\\n\t\r"); for(i=0;i<7;++i) { ajStrTokenNextParseC(&handle,"\\\n\t\r",&token); if(i==5) { ajStrAssignC(&cutgextractOrg,"E"); ajStrAppendS(&cutgextractOrg, token); ajStrAssignS(pspecies, token); if(ajStrMatchWildS(token,wildspecies)) { done = ajTrue; } } switch(i) { case 0: ajStrAppendC(pdoc, "#ID "); ajStrAppendS(pdoc, token); ajStrAppendC(pdoc, "\n"); break; case 1: ajStrAppendC(pdoc, "#AC "); ajStrAppendS(pdoc, token); ajStrAppendC(pdoc, "\n"); break; case 2: ajStrAppendC(pdoc, "#FT "); ajStrAppendS(pdoc, token); ajStrAppendC(pdoc, "\n"); break; case 3: ajStrAppendC(pdoc, "#FL "); ajStrAppendS(pdoc, token); ajStrAppendC(pdoc, "\n"); break; case 4: ajStrAppendC(pdoc, "#PI "); ajStrAppendS(pdoc, token); ajStrAppendC(pdoc, "\n"); ajStrAssignS(&cutgextractSavepid, token); break; case 5: ajStrAppendC(pdoc, "#OS "); ajStrAppendS(pdoc, token); ajStrAppendC(pdoc, "\n"); break; case 6: ajStrAppendC(pdoc, "#DE "); ajStrAppendS(pdoc, token); ajStrAppendC(pdoc, "\n"); break; default: break; } } ajStrTokenDel(&handle); ajStrDel(&token); if(!done) if(!ajReadlineTrim(inf,&cutgextractLine)) return NULL; } len = ajStrGetLen(cutgextractOrg); p = ajStrGetuniquePtr(&cutgextractOrg); for(i=0;i<len;++i) { c = p[i]; if(c=='/' || c==' ' || c=='.' || c=='\'') p[i]='_'; } if(p[strlen(p)-1]=='_') p[strlen(p)-1]='\0'; ajStrDel(&token); ajStrTokenDel(&handle); return p; }
int main(int argc, char **argv) { /* Variable declarations */ AjPStr query; AjPOutfile outfile = NULL; AjPResource resource = NULL; AjPResourcein resourcein = NULL; AjPOboin oboin = NULL; AjPObo obo = NULL; AjPStr oboqry = NULL; AjPStr resourceqry = NULL; AjPStr qrystr = NULL; AjPTable obotable = NULL; AjPTable foundtable = NULL; AjBool subclasses = ajFalse; AjPStrTok handle = NULL; AjPList obolist = NULL; AjPObo obotest = NULL; ajuint i; ajuint imax = 3; const char* fields[] = {"id", "acc", "nam", "des"}; /* ACD processing */ embInit("drfindid", argc, argv); query = ajAcdGetString("query"); outfile = ajAcdGetOutresource("outfile"); /* sensitive = ajAcdGetBoolean("sensitive"); */ subclasses = ajAcdGetBoolean("subclasses"); resourcein = ajResourceinNew(); resource = ajResourceNew(); oboin = ajOboinNew(); obo = ajOboNew(); obolist = ajListNew(); obotable = ajTablestrNew(600); foundtable = ajTablestrNew(600); handle = ajStrTokenNewC(query, ","); while(ajStrTokenNextParse(&handle, &qrystr)) { for(i=0; i<imax; i++) { ajFmtPrintS(&oboqry, "edam-%s:%S", fields[i], qrystr); ajOboinQryS(oboin, oboqry); while(ajOboinRead(oboin, obo)) { ajListPushAppend(obolist, ajOboNewObo(obo)); if(subclasses) ajOboGetTree(obo, obolist); ajDebug("%S '%S' %u\n", qrystr, obo->Id, ajListGetLength(obolist)); while(ajListGetLength(obolist)) { ajListPop(obolist, (void**) &obotest); if(!ajTableMatchS(obotable, obotest->Id)) { ajDebug("edam %s '%S' namespace '%S' name '%S'\n", fields[i], obotest->Id, obotest->Namespace, obotest->Name); ajTablePut(obotable, ajStrNewS(obotest->Id), (void *) 1); ajFmtPrintS(&resourceqry, "drcat-eid:%S", ajOboGetId(obotest)); ajResourceinQryS(resourcein, resourceqry); while(ajResourceinRead(resourcein, resource)) { if(!ajTableMatchS(foundtable, resource->Id)) { ajDebug("drcat id '%S' category '%S'\n", resource->Id, resource->Cat); ajResourceoutWrite(outfile, resource); ajTablePut(foundtable, ajStrNewS(resource->Id), (void *) 1); } } } ajOboDel(&obotest); } } } } /* Memory clean-up and exit */ ajOboDel(&obo); ajOboinDel(&oboin); ajResourceDel(&resource); ajResourceinDel(&resourcein); ajListFree(&obolist); ajStrTokenDel(&handle); ajStrDel(&qrystr); ajStrDel(&query); ajStrDel(&oboqry); ajStrDel(&resourceqry); ajTablestrFreeKey(&obotable); ajTablestrFreeKey(&foundtable); ajOutfileClose(&outfile); embExit(); return 0; }