AjPPatternRegex ajPatternRegexNewList(AjPPatlistRegex plist, const AjPStr name, const AjPStr pat) { AjPPatternRegex pthis; if (!ajStrGetLen(pat)) return NULL; AJNEW0(pthis); if(ajStrGetLen(name)) ajStrAssignS (&pthis->Name,name); else ajFmtPrintS(&pthis->Name, "regex%d", 1+ajListGetLength(plist->Patlist)); ajStrAssignS (&pthis->Pattern,pat); pthis->Type = plist->Type; pthis->Compiled = ajRegComp(pthis->Pattern); ajPatlistAddRegex (plist,pthis); return pthis; }
int main(int argc, char *argv[]) { embInitPV("genret", argc, argv, "GEMBASSY", "1.0.3"); AjPSeqall seqall; AjPSeq seq = NULL; AjPStr inseq = NULL; AjPStr gene = NULL; AjPStr access = NULL; AjBool accid = ajTrue; AjPStr argument = NULL; AjPFile outfile = NULL; AjPStr seqid = NULL; AjPStr restid = NULL; AjBool valid = ajFalse; AjBool isseq = ajFalse; AjBool isgbk = ajFalse; AjPFilebuff buff = NULL; AjPFile tmpfile = NULL; AjPStr tmpname = NULL; AjPStr regexstr = NULL; AjPStrTok token = NULL; AjPRegexp regex = NULL; AjPStr url = NULL; AjPStr base = NULL; AjPStr head = NULL; AjPStr line = NULL; seqall = ajAcdGetSeqall("sequence"); access = ajAcdGetString("access"); gene = ajAcdGetString("gene"); argument = ajAcdGetString("argument"); accid = ajAcdGetBoolean("accid"); outfile = ajAcdGetOutfile("outfile"); if( ajStrMatchC(access, "translation") || ajStrMatchC(access, "get_exon") || ajStrMatchC(access, "get_exons") || ajStrMatchC(access, "get_cdsseq") || ajStrMatchC(access, "get_gbkseq") || ajStrMatchC(access, "get_geneseq") || ajStrMatchC(access, "get_intron") || ajStrMatchC(access, "getseq") || ajStrMatchC(access, "seq") || ajStrMatchC(access, "around_startcodon") || ajStrMatchC(access, "around_stopcodon") || ajStrMatchC(access, "before_startcodon") || ajStrMatchC(access, "before_stopcodon") || ajStrMatchC(access, "after_startcodon") || ajStrMatchC(access, "after_stopcodon") ) { isseq = ajTrue; } else if(ajStrMatchC(access, "annotate") || ajStrMatchC(access, "output")) { isgbk = ajTrue; } else { ajFmtPrintF(outfile, "gene,%S\n", access); } base = ajStrNewC("rest.g-language.org"); ajStrExchangeCC(&argument, " ", "/"); ajStrExchangeCC(&argument, ",", "/"); ajStrExchangeCC(&argument, "\t", "/"); ajStrExchangeCC(&argument, "\r", "/"); ajStrExchangeCC(&argument, "\n", "/"); if(ajStrMatchC(gene, "*")) { ajStrInsertK(&gene, 0, '.'); } if(ajStrPrefixC(gene, "@") || ajStrPrefixC(gene, "list::")) { ajStrExchangeCC(&gene, "@", ""); ajStrExchangeCC(&gene, "list::", ""); ajStrAssignS(&tmpname, gene); tmpfile = ajFileNewInNameS(tmpname); if(!tmpfile) { ajDie("List file (%S) open error\n", tmpname); } gene = ajStrNew(); while(ajReadline(tmpfile, &line)) { ajStrAppendS(&gene, line); } ajFileClose(&tmpfile); ajStrDel(&tmpname); ajStrDel(&line); } tmpname = ajStrNew(); gAssignUniqueName(&tmpname); while(ajSeqallNext(seqall, &seq)) { inseq = ajStrNew(); if(!accid) { if(gFormatGenbank(seq, &inseq)) { tmpfile = ajFileNewOutNameS(tmpname); if(!tmpfile) { ajDie("Output file (%S) open error\n", tmpname); } ajFmtPrintF(tmpfile, "%S", inseq); ajFileClose(&tmpfile); ajFmtPrintS(&url, "http://%S/upload/upl.pl", base); gFilePostSS(url, tmpname, &restid); ajStrDel(&url); ajSysFileUnlinkS(tmpname); } else { ajWarn("Sequence does not have features\n" "Proceeding with sequence accession ID\n"); accid = ajTrue; } } ajStrAssignS(&seqid, ajSeqGetAccS(seq)); if(ajStrGetLen(seqid) == 0) { ajStrAssignS(&seqid, ajSeqGetNameS(seq)); } if(ajStrGetLen(seqid) == 0) { ajWarn("No valid header information\n"); } if(accid) { ajStrAssignS(&restid, seqid); if(ajStrGetLen(seqid) == 0) { ajDie("Cannot proceed without header with -accid\n"); } if(!gValID(seqid)) { ajDie("Invalid accession ID:%S, exiting\n", seqid); } } url = ajStrNew(); if(isgbk) { ajFmtPrintS(&url, "http://%S/%S/%S", base, restid, access); } else { ajFmtPrintS(&url, "http://%S/%S/*/%S/%S", base, restid, access, argument); } if(!gFilebuffURLS(url, &buff)) { ajDie("GET error from %S\n", url); } while(ajBuffreadLine(buff, &line)) { if(isgbk){ ajFmtPrintF(outfile, "%S", line); continue; } ajStrRemoveLastNewline(&line); regex = ajRegCompC("^>"); if(ajRegExec(regex, line)) { head = ajStrNew(); ajStrAssignS(&head, line); ajStrTrimStartC(&head, ">"); valid = ajFalse; token = ajStrTokenNewC(ajStrNewS(gene), " ,\t\r\n"); while(ajStrTokenNextParse(token, ®exstr)) { if(ajStrGetLen(regexstr)) { regex = ajRegComp(regexstr); if(ajRegExec(regex, line)) { valid = ajTrue; if(ajStrIsAlnum(regexstr)) { ajStrExchangeSC(&gene, regexstr, ""); } } ajRegFree(®ex); } } } else { if(valid) { if(isseq) { ajStrFmtWrap(&line, 60); ajFmtPrintF(outfile, ">%S\n%S\n", head, line); } else { ajFmtPrintF(outfile, "%S,%S\n", head, line); } valid = ajFalse; } } } ajFileClose(&outfile); ajStrDel(&restid); ajStrDel(&seqid); ajStrDel(&inseq); } ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&access); ajStrDel(&gene); embExit(); }
int main(int argc, char **argv) { AjPSeqall seqall; AjPFile primfile; AjPStr rdline = NULL; Primer primdata; AjPStrTok handle = NULL; AjPList primList = NULL; embInit("stssearch", argc, argv); primfile = ajAcdGetInfile("infile"); out = ajAcdGetOutfile("outfile"); seqall = ajAcdGetSeqall("seqall"); while(ajReadlineTrim(primfile, &rdline)) { if(ajStrGetCharFirst(rdline) == '#') continue; if(ajStrSuffixC(rdline, "..")) continue; AJNEW(primdata); primdata->Name = NULL; primdata->Oligoa = NULL; primdata->Oligob = NULL; handle = ajStrTokenNewC(rdline, " \t"); ajStrTokenNextParse(&handle, &primdata->Name); if(!(nprimers % 1000)) ajDebug("Name [%d]: '%S'\n", nprimers, primdata->Name); ajStrTokenNextParse(&handle, &primdata->Oligoa); ajStrFmtUpper(&primdata->Oligoa); primdata->Prima = ajRegComp(primdata->Oligoa); ajStrTokenNextParse(&handle, &primdata->Oligob); ajStrFmtUpper(&primdata->Oligob); primdata->Primb = ajRegComp(primdata->Oligob); ajStrTokenDel(&handle); if(!nprimers) primList = ajListNew(); ajListPushAppend(primList, primdata); nprimers++; } if(!nprimers) ajFatal("No primers read\n"); ajDebug("%d primers read\n", nprimers); while(ajSeqallNext(seqall, &seq)) { ajSeqFmtUpper(seq); ajStrAssignS(&seqstr, ajSeqGetSeqS(seq)); ajStrAssignS(&revstr, ajSeqGetSeqS(seq)); ajSeqstrReverse(&revstr); ajDebug("Testing: %s\n", ajSeqGetNameC(seq)); ntests = 0; ajListMap(primList, stssearch_primTest, NULL); } ajFileClose(&out); ajSeqallDel(&seqall); ajSeqDel(&seq); ajFileClose(&out); ajStrDel(&revstr); ajStrDel(&seqstr); ajFileClose(&primfile); ajListMap(primList, stssearch_primDel, NULL); ajListFree(&primList); ajStrDel(&rdline); embExit(); return 0; }