ajint gValID(AjPStr id){ AjPFilebuff buff = NULL; AjPStr url = NULL; AjPStr line = NULL; AjPRegexp pval = NULL; url = ajStrNewC("http://web.sfc.keio.ac.jp/~t11080hi/valID/valID.cgi?id="); url = ajStrNew(); ajFmtPrintS(&url, "http://rest.g-language.org/%S", id); //ajStrAppendS(&url, id); if(!gFilebuffURLS(url, &buff)) { return ajFalse; } return ajTrue; ajBuffreadLine(buff, &line); pval = ajRegCompC("^0"); if(ajRegExec(pval, line)) return ajFalse; return ajTrue; }
AjBool gHttpConvertS(AjPStr url, AjPFile* outf, AjPStr informat, AjPStr outformat) { AjPRegexp regexp = NULL; AjPStr jobid = NULL; AjPStr convert = NULL; regexp = ajRegCompC("^.+jobid="); if(!ajRegExec(regexp, url)) { return ajFalse; } if(!ajRegPost(regexp, &jobid)) { return ajFalse; } convert = ajFmtStr("http://soap.g-language.org/WS/convert.cgi?" "jobid=%S&informat=%S&outformat=%S", jobid, informat, outformat); if(!gHttpGetBinS(convert, outf)) { return ajFalse; } return ajTrue; }
AjBool gHttpGetBinS(AjPStr url, AjPFile* outf) { AjPFile file = NULL; AjPStr line = NULL; AjPStr host = NULL; AjPStr path = NULL; AjPStr get = NULL; ajint port = 80; ajuint http = 0; FILE *fp; AjPRegexp crlf = NULL; char buf[8]; AjOSysSocket sock; get = ajStrNew(); ajHttpUrlDeconstruct(url, &port, &host, &path); while(file==NULL || gHttpRedirect(file, &host, &port, &path)) { if(ajStrGetCharFirst(path) != '/') ajStrInsertK(&path, 0, '/'); ajFmtPrintS(&get, "GET http://%S:%d%S HTTP/1.1\r\n", host, port, path); fp = ajHttpOpen(NULL, host, port, get, &sock); file = ajFileNewFromCfile(fp); if(!file) return ajFalse; } ajStrDel(&get); crlf = ajRegCompC("^\r?\n$"); while(ajReadline(file, &line)) { if(ajRegExec(crlf, line)) break; } while(ajReadbinBinary(file, 1, 1, buf)) { ajWritebinBinary(*outf, 1, 1, buf); } ajFileClose(outf); ajFileClose(&file); return ajTrue; }
static AjBool dbiblast_parseId(const AjPStr line, AjPFile * alistfile, AjBool systemsort, AjPStr const * fields, ajint* maxFieldLen, ajuint* countfield, AjPStr* myid, AjPList * myfdl) { static AjPRegexp idexp = NULL; static AjBool reset = AJTRUE; (void) alistfile; (void) systemsort; (void) maxFieldLen; (void) countfield; (void) myfdl; if(!fields) { reset = ajTrue; return ajFalse; } if(reset) { reset = ajFalse; } if(!idexp) idexp = ajRegCompC("^([^ ]+)"); if(!ajRegExec(idexp, line)) return ajFalse; ajRegSubI(idexp, 1, myid); ajStrFmtUpper(myid); ajDebug("parseId '%S'\n", *myid); return ajTrue; }
AjPPatlistSeq ajPatlistSeqRead (const AjPStr patspec, const AjPStr patname, const AjPStr fmt, AjBool protein, ajuint mismatches) { AjPPatlistSeq patlist = NULL; AjPStr line = NULL; AjPStr name = NULL; AjPFilebuff infile = NULL; AjPRegexp mismreg = NULL; AjPStr patstr = NULL; AjPStr pat = NULL; ajuint mismatch = 0; ajint ifmt = 0; ajuint npat = 0; AjPStr namestr = NULL; ajStrAssignS(&namestr, patname); ajStrAssignEmptyC(&namestr, "pattern"); ajStrAssignS(&patstr, patspec); patlist = ajPatlistSeqNewType(protein); ifmt = patternSeqFormat(fmt); ajDebug("ajPatlistSeqRead patspec: '%S' patname: '%S' " "protein: %B mismatches: %d\n", patspec, patname, protein, mismatches); if(ajStrGetCharFirst(patstr) == '@') { ajStrCutStart(&patstr, 1); infile = ajFilebuffNewNameS(patstr); if(!infile) { ajErr("Unable to open pattern file '%S'", patstr); return NULL; } line = ajStrNew(); name = ajStrNew(); if(!ifmt) { ajBuffreadLineTrim(infile,&line); if(ajStrPrefixC(line, ">")) ifmt = 2; else ifmt = 1; ajFilebuffReset(infile); } switch(ifmt) { case 1: while (ajBuffreadLineTrim(infile,&line)) { npat++; ajStrAppendS (&pat,line); ajFmtPrintS(&name, "%S%u", namestr, npat); ajPatternSeqNewList(patlist,name,pat,mismatches); ajStrSetClear(&pat); } break; default: mismreg = ajRegCompC("<mismatch=(\\d+)>"); while (ajBuffreadLineTrim(infile,&line)) { if (ajStrGetCharFirst(line) == '>') { if (ajStrGetLen(name)) { ajPatternSeqNewList(patlist,name,pat, mismatch); ajStrSetClear(&name); ajStrSetClear(&pat); mismatch=mismatches; } ajStrCutStart(&line,1); if (ajRegExec(mismreg,line)) { ajRegSubI(mismreg,1,&name); ajStrToUint(name,&mismatch); ajStrTruncateLen(&line,ajRegOffset(mismreg)); ajStrTrimWhiteEnd(&line); } ajStrAssignS (&name,line); ajStrAssignEmptyS(&name, patname); } else ajStrAppendS (&pat,line); } ajStrAssignEmptyS(&name, patname); ajPatternSeqNewList(patlist,name,pat,mismatch); ajRegFree(&mismreg); break; } ajFilebuffDel(&infile); } else { ajStrAssignS(&name, namestr); ajPatternSeqNewList(patlist,name,patstr,mismatches); } ajStrDel(&name); ajStrDel(&line); ajStrDel(&pat); ajStrDel(&namestr); ajStrDel(&patstr); return patlist; }
AjBool gHttpRedirect(AjPFile file, AjPStr* host, ajint* port, AjPStr* path) { AjPFilebuff buff = NULL; AjPRegexp httpexp = NULL; AjPRegexp nullexp = NULL; AjPRegexp redirexp = NULL; AjPStr codestr = NULL; AjPStr newurl = NULL; AjPStr newhost = NULL; AjPStr currline = NULL; ajuint httpcode = 0; AjBool isheader = ajFalse; AjBool ret = ajFalse; httpexp = ajRegCompC("^HTTP/\\S+\\s+(\\d+)"); ajReadline(file, &currline); ajDebug("gHttpRedirect: First line: '%S'\n", currline); if(ajRegExec(httpexp, currline)) { isheader = ajTrue; ajRegSubI(httpexp, 1, &codestr); ajStrToUint(codestr, &httpcode); ajDebug("Header: codestr '%S' code '%u'\n", codestr, httpcode); ajStrDel(&codestr); } if(isheader) { if(httpcode == 301 || httpcode == 302 || httpcode==307) { redirexp = ajRegCompC("^Location: (\\S+)"); nullexp = ajRegCompC("^\r?\n?$"); while( ajReadline(file, &currline) && !ajRegExec(nullexp, currline)) { ajDebug("gHttpRedirect: header line: '%S'\n", currline); if(ajRegExec(redirexp, currline)) { ajRegSubI(redirexp, 1, &newurl); ajHttpUrlDeconstruct(newurl, port, &newhost, path); if(ajStrGetLen(newhost)) ajStrAssignS(host, newhost); ajStrDel(&newurl); ajStrDel(&newhost); ret = ajTrue; break; } } ajRegFree(&redirexp); ajRegFree(&nullexp); } } ajRegFree(&httpexp); ajStrDel(&currline); return ret; }
static AjBool dbiblast_parseSimple(const AjPStr line, AjPFile * alistfile, AjBool systemsort, AjPStr const * fields, ajint* maxFieldLen, ajuint* countfield, AjPStr* myid, AjPList* myfdl) { static AjPRegexp idexp = NULL; static AjPStr mytmpac = NULL; char* ac; static ajint numFields; static ajint accfield = -1; static AjBool reset = AJTRUE; if(!fields) { reset = ajTrue; accfield = -1; return ajFalse; } if(reset) { numFields = 0; while(fields[numFields]) { if(ajStrMatchCaseC(fields[numFields], "acc")) accfield=numFields; else if(!ajStrMatchCaseC(fields[numFields], "sv") && !ajStrMatchCaseC(fields[numFields], "des")) ajWarn("Simple ID parsing unknown field '%S' ignored", fields[numFields]); numFields++; } reset = ajFalse; } if(!idexp) idexp = ajRegCompC("^([^ ]+)( +([A-Za-z][A-Za-z0-9]+[0-9]))"); if(!ajRegExec(idexp, line)) return ajFalse; ajRegSubI(idexp, 1, myid); ajRegSubI(idexp, 3, &mytmpac); ajStrFmtUpper(myid); ajStrFmtUpper(&mytmpac); /* GCG mixes case on new SwissProt acnums */ if(accfield >= 0) { embDbiMaxlen(&mytmpac, &maxFieldLen[accfield]); countfield[accfield]++; if(systemsort) ajFmtPrintF(alistfile[accfield], "%S %S\n", *myid, mytmpac); else { ac = ajCharNewS(mytmpac); ajListPushAppend(myfdl[accfield], ac); } } ajDebug("parseSimple '%S' '%S'\n", *myid, mytmpac); return ajTrue; }
static AjBool dbiblast_parseNcbi(const AjPStr line, AjPFile * alistfile, AjBool systemsort, AjPStr const * fields, ajint* maxFieldLen, ajuint* countfield, AjPStr* myid, AjPList* fdlist) { char* fd; static ajint numFields; static ajint accfield = -1; static ajint desfield = -1; static ajint svnfield = -1; static AjBool reset = AJTRUE; if(!fields) { reset = ajTrue; accfield = svnfield = desfield = -1; return ajFalse; } if(reset) { numFields = 0; while(fields[numFields]) { if(ajStrMatchCaseC(fields[numFields], "acc")) accfield=numFields; else if(ajStrMatchCaseC(fields[numFields], "sv")) svnfield=numFields; else if(ajStrMatchCaseC(fields[numFields], "des")) desfield=numFields; else ajWarn("EMBL parsing unknown field '%S' ignored", fields[numFields]); numFields++; } reset = ajFalse; } if(!wrdexp) wrdexp = ajRegCompC("([A-Za-z0-9]+)"); ajStrAssignC(&tmpdes,""); ajStrAssignC(&t,""); ajStrAssignC(&tmpac,""); ajStrAssignC(&tmpsv,""); ajStrAssignC(&tmpgi,""); ajStrAssignC(&tmpdb,""); ajFmtPrintS(&t,">%S",line); if(!ajSeqParseNcbi(t,myid,&tmpac,&tmpsv,&tmpgi,&tmpdb,&tmpdes)) return ajFalse; if(ajStrGetLen(tmpac)) ajStrFmtUpper(&tmpac); if(accfield >= 0) embDbiMaxlen(&tmpac, &maxFieldLen[accfield]); if(svnfield >= 0) { embDbiMaxlen(&tmpsv, &maxFieldLen[svnfield]); embDbiMaxlen(&tmpgi, &maxFieldLen[svnfield]); } ajStrFmtUpper(myid); /* ajDebug("parseNCBI success\n"); */ if(systemsort) { if(accfield >= 0 && ajStrGetLen(tmpac)) { countfield[accfield]++; ajFmtPrintF(alistfile[accfield], "%S %S\n", *myid, tmpac); } if(svnfield >= 0 && ajStrGetLen(tmpsv)) { countfield[svnfield]++; ajFmtPrintF(alistfile[svnfield], "%S %S\n", *myid, tmpsv); } if(svnfield >= 0 && ajStrGetLen(tmpgi)) { countfield[svnfield]++; ajFmtPrintF(alistfile[svnfield], "%S %S\n", *myid, tmpgi); } if(desfield >= 0 && ajStrGetLen(tmpdes)) while(ajRegExec(wrdexp, tmpdes)) { ajRegSubI(wrdexp, 1, &tmpfd); embDbiMaxlen(&tmpfd, &maxFieldLen[desfield]); ajStrFmtUpper(&tmpfd); ajDebug("++des '%S'\n", tmpfd); countfield[desfield]++; ajFmtPrintF(alistfile[desfield], "%S %S\n", *myid, tmpfd); ajRegPost(wrdexp, &tmpdes); } } else { if(accfield >= 0 && ajStrGetLen(tmpac)) { fd = ajCharNewS(tmpac); countfield[accfield]++; ajListPushAppend(fdlist[accfield], fd); } if(svnfield >= 0 && ajStrGetLen(tmpsv)) { fd = ajCharNewS(tmpsv); countfield[svnfield]++; ajListPushAppend(fdlist[svnfield], fd); } if(svnfield >= 0 && ajStrGetLen(tmpgi)) { fd = ajCharNewS(tmpgi); ajListPushAppend(fdlist[svnfield], fd); } if(desfield >= 0 && ajStrGetLen(tmpdes)) { while(ajRegExec(wrdexp, tmpdes)) { ajRegSubI(wrdexp, 1, &tmpfd); embDbiMaxlen(&tmpfd, &maxFieldLen[desfield]); ajStrFmtUpper(&tmpfd); ajDebug("++des '%S'\n", tmpfd); fd = ajCharNewS(tmpfd); countfield[desfield]++; ajListPushAppend(fdlist[desfield], fd); ajRegPost(wrdexp, &tmpdes); } } } /* ajDebug("parseNCBI '%S' '%S'\n", *myid, tmpac); */ return ajTrue; }
static AjBool dbifasta_ParseFasta(AjPFile libr, ajint* dpos, ajint* maxFieldLen, ajuint* countfield, AjPRegexp idexp, ajuint usertype, AjPFile* alistfile, AjBool systemsort, AjPStr const * fields) { char* fd; ajlong ipos; static AjPStr tstr = NULL; static ajint numFields; static ajint accfield = -1; static ajint desfield = -1; static ajint svnfield = -1; static AjBool reset = AJTRUE; ajuint type = usertype; if(!fields) { reset = ajTrue; accfield = svnfield = desfield = -1; return ajFalse; } if(reset) { numFields = 0; while(fields[numFields]) { if(ajStrMatchCaseC(fields[numFields], "acc")) accfield=numFields; else if(ajStrMatchCaseC(fields[numFields], "sv")) svnfield=numFields; else if(ajStrMatchCaseC(fields[numFields], "des")) desfield=numFields; else ajWarn("EMBL parsing unknown field '%S' ignored", fields[numFields]); numFields++; } reset = ajFalse; } if(!dbifastaGWrdexp) dbifastaGWrdexp = ajRegCompC("([A-Za-z0-9]+)"); if(!tstr) tstr = ajStrNew(); *dpos = (ajint) ajFileResetPos(libr); /* Lossy cast */ ajReadline(libr, &dbifastaGRline); if(!ajStrGetLen(dbifastaGRline)) return ajFalse; if(!ajRegExec(idexp,dbifastaGRline)) { ajStrDelStatic(&dbifastaGTmpAc); type = FASTATYPE_SIMPLE; idexp = dbifastaGIdexp; if(!ajRegExec(idexp, dbifastaGRline)) { ajFatal("Unrecognised ID line format: %S", dbifastaGRline); return ajFalse; } ajWarn("Invalid ID line for selected format: %S", dbifastaGRline); } /* ** each case needs to set id, tmpac, tmpsv, tmpdes ** using empty values if they are not found */ ajStrAssignC(&dbifastaGTmpSv, ""); ajStrAssignC(&dbifastaGTmpGi, ""); ajStrAssignC(&dbifastaGTmpDb, ""); ajStrAssignC(&dbifastaGTmpDes, ""); ajStrAssignC(&dbifastaGTmpAc, ""); ajStrAssignC(&dbifastaGTmpId, ""); switch(type) { case FASTATYPE_SIMPLE: ajRegSubI(idexp,2,&dbifastaGTmpId); ajStrAssignS(&dbifastaGTmpAc,dbifastaGTmpId); ajRegPost(idexp, &dbifastaGTmpDes); break; case FASTATYPE_DBID: ajRegSubI(idexp,1,&dbifastaGTmpId); ajStrAssignS(&dbifastaGTmpAc,dbifastaGTmpId); ajRegPost(idexp, &dbifastaGTmpDes); break; case FASTATYPE_GCGID: ajRegSubI(idexp,1,&dbifastaGTmpId); ajStrAssignS(&dbifastaGTmpAc,dbifastaGTmpId); ajRegPost(idexp, &dbifastaGTmpDes); break; case FASTATYPE_NCBI: if(!ajSeqParseNcbi(dbifastaGRline, &dbifastaGTmpId, &dbifastaGTmpAc, &dbifastaGTmpSv, &dbifastaGTmpGi, &dbifastaGTmpDb, &dbifastaGTmpDes)) { ajStrDelStatic(&dbifastaGTmpAc); return ajFalse; } break; case FASTATYPE_GCGIDACC: ajRegSubI(idexp,1,&dbifastaGTmpId); ajRegSubI(idexp,2,&dbifastaGTmpAc); ajRegPost(idexp, &dbifastaGTmpDes); break; case FASTATYPE_GCGACCID: ajRegSubI(idexp,1,&dbifastaGTmpAc); ajRegSubI(idexp,2,&dbifastaGTmpId); ajRegPost(idexp, &dbifastaGTmpDes); break; case FASTATYPE_IDACC: ajRegSubI(idexp,1,&dbifastaGTmpId); ajRegSubI(idexp,2,&dbifastaGTmpAc); ajRegPost(idexp, &dbifastaGTmpDes); break; case FASTATYPE_ACCID: ajRegSubI(idexp,1,&dbifastaGTmpAc); ajRegSubI(idexp,2,&dbifastaGTmpId); ajRegPost(idexp, &dbifastaGTmpDes); break; default: ajStrDelStatic(&dbifastaGTmpAc); return ajFalse; } ajStrFmtUpper(&dbifastaGTmpId); ajStrFmtUpper(&dbifastaGTmpAc); if(accfield >= 0) embDbiMaxlen(&dbifastaGTmpAc, &maxFieldLen[accfield]); if(svnfield >= 0) { embDbiMaxlen(&dbifastaGTmpSv, &maxFieldLen[svnfield]); embDbiMaxlen(&dbifastaGTmpGi, &maxFieldLen[svnfield]); } if(systemsort) { if(accfield >= 0 && ajStrGetLen(dbifastaGTmpAc)) { countfield[accfield]++; ajFmtPrintF(alistfile[accfield], "%S %S\n", dbifastaGTmpId, dbifastaGTmpAc); } if(svnfield >= 0 && ajStrGetLen(dbifastaGTmpSv)) { countfield[svnfield]++; ajFmtPrintF(alistfile[svnfield], "%S %S\n", dbifastaGTmpId, dbifastaGTmpSv); } if(svnfield >= 0 && ajStrGetLen(dbifastaGTmpGi)) { countfield[svnfield]++; ajFmtPrintF(alistfile[svnfield], "%S %S\n", dbifastaGTmpId, dbifastaGTmpGi); } if(desfield >= 0 && ajStrGetLen(dbifastaGTmpDes)) while(ajRegExec(dbifastaGWrdexp, dbifastaGTmpDes)) { ajRegSubI(dbifastaGWrdexp, 1, &dbifastaGTmpFd); embDbiMaxlen(&dbifastaGTmpFd, &maxFieldLen[desfield]); ajStrFmtUpper(&dbifastaGTmpFd); ajDebug("++des '%S' tmpdes '%S\n", dbifastaGTmpFd, dbifastaGTmpDes); countfield[desfield]++; ajFmtPrintF(alistfile[desfield], "%S %S\n", dbifastaGTmpId, dbifastaGTmpFd); ajRegPost(dbifastaGWrdexp, &dbifastaGTmpDes); } } else { if(accfield >= 0 && ajStrGetLen(dbifastaGTmpAc)) { fd = ajCharNewS(dbifastaGTmpAc); ajListPushAppend(dbifastaGFdl[accfield],fd); countfield[accfield]++; } if(svnfield >= 0 && ajStrGetLen(dbifastaGTmpSv)) { fd = ajCharNewS(dbifastaGTmpSv); ajListPushAppend(dbifastaGFdl[svnfield], fd); countfield[svnfield]++; } if(svnfield >= 0 && ajStrGetLen(dbifastaGTmpGi)) { fd = ajCharNewS(dbifastaGTmpGi); ajListPushAppend(dbifastaGFdl[svnfield], fd); countfield[svnfield]++; } if(desfield >= 0 && ajStrGetLen(dbifastaGTmpDes)) while(ajRegExec(dbifastaGWrdexp, dbifastaGTmpDes)) { ajRegSubI(dbifastaGWrdexp, 1, &dbifastaGTmpFd); embDbiMaxlen(&dbifastaGTmpFd, &maxFieldLen[desfield]); ajStrFmtUpper(&dbifastaGTmpFd); ajDebug("++des '%S' tmpdes: '%S'\n", dbifastaGTmpFd, dbifastaGTmpDes); fd = ajCharNewS(dbifastaGTmpFd); ajListPushAppend(dbifastaGFdl[desfield], fd); countfield[desfield]++; ajRegPost(dbifastaGWrdexp, &dbifastaGTmpDes); } } ipos = ajFileResetPos(libr); while(ajReadline(libr, &dbifastaGRline)) { if(ajStrGetCharFirst(dbifastaGRline) == '>') { ajFileSeek(libr, ipos, 0); return ajTrue; } ipos = ajFileResetPos(libr); } ajFileSeek(libr, ipos, 0); /* end of file reached */ return ajTrue; }
int main(int argc, char *argv[]) { embInitPV("genret", argc, argv, "GEMBASSY", "1.0.3"); AjPSeqall seqall; AjPSeq seq = NULL; AjPStr inseq = NULL; AjPStr gene = NULL; AjPStr access = NULL; AjBool accid = ajTrue; AjPStr argument = NULL; AjPFile outfile = NULL; AjPStr seqid = NULL; AjPStr restid = NULL; AjBool valid = ajFalse; AjBool isseq = ajFalse; AjBool isgbk = ajFalse; AjPFilebuff buff = NULL; AjPFile tmpfile = NULL; AjPStr tmpname = NULL; AjPStr regexstr = NULL; AjPStrTok token = NULL; AjPRegexp regex = NULL; AjPStr url = NULL; AjPStr base = NULL; AjPStr head = NULL; AjPStr line = NULL; seqall = ajAcdGetSeqall("sequence"); access = ajAcdGetString("access"); gene = ajAcdGetString("gene"); argument = ajAcdGetString("argument"); accid = ajAcdGetBoolean("accid"); outfile = ajAcdGetOutfile("outfile"); if( ajStrMatchC(access, "translation") || ajStrMatchC(access, "get_exon") || ajStrMatchC(access, "get_exons") || ajStrMatchC(access, "get_cdsseq") || ajStrMatchC(access, "get_gbkseq") || ajStrMatchC(access, "get_geneseq") || ajStrMatchC(access, "get_intron") || ajStrMatchC(access, "getseq") || ajStrMatchC(access, "seq") || ajStrMatchC(access, "around_startcodon") || ajStrMatchC(access, "around_stopcodon") || ajStrMatchC(access, "before_startcodon") || ajStrMatchC(access, "before_stopcodon") || ajStrMatchC(access, "after_startcodon") || ajStrMatchC(access, "after_stopcodon") ) { isseq = ajTrue; } else if(ajStrMatchC(access, "annotate") || ajStrMatchC(access, "output")) { isgbk = ajTrue; } else { ajFmtPrintF(outfile, "gene,%S\n", access); } base = ajStrNewC("rest.g-language.org"); ajStrExchangeCC(&argument, " ", "/"); ajStrExchangeCC(&argument, ",", "/"); ajStrExchangeCC(&argument, "\t", "/"); ajStrExchangeCC(&argument, "\r", "/"); ajStrExchangeCC(&argument, "\n", "/"); if(ajStrMatchC(gene, "*")) { ajStrInsertK(&gene, 0, '.'); } if(ajStrPrefixC(gene, "@") || ajStrPrefixC(gene, "list::")) { ajStrExchangeCC(&gene, "@", ""); ajStrExchangeCC(&gene, "list::", ""); ajStrAssignS(&tmpname, gene); tmpfile = ajFileNewInNameS(tmpname); if(!tmpfile) { ajDie("List file (%S) open error\n", tmpname); } gene = ajStrNew(); while(ajReadline(tmpfile, &line)) { ajStrAppendS(&gene, line); } ajFileClose(&tmpfile); ajStrDel(&tmpname); ajStrDel(&line); } tmpname = ajStrNew(); gAssignUniqueName(&tmpname); while(ajSeqallNext(seqall, &seq)) { inseq = ajStrNew(); if(!accid) { if(gFormatGenbank(seq, &inseq)) { tmpfile = ajFileNewOutNameS(tmpname); if(!tmpfile) { ajDie("Output file (%S) open error\n", tmpname); } ajFmtPrintF(tmpfile, "%S", inseq); ajFileClose(&tmpfile); ajFmtPrintS(&url, "http://%S/upload/upl.pl", base); gFilePostSS(url, tmpname, &restid); ajStrDel(&url); ajSysFileUnlinkS(tmpname); } else { ajWarn("Sequence does not have features\n" "Proceeding with sequence accession ID\n"); accid = ajTrue; } } ajStrAssignS(&seqid, ajSeqGetAccS(seq)); if(ajStrGetLen(seqid) == 0) { ajStrAssignS(&seqid, ajSeqGetNameS(seq)); } if(ajStrGetLen(seqid) == 0) { ajWarn("No valid header information\n"); } if(accid) { ajStrAssignS(&restid, seqid); if(ajStrGetLen(seqid) == 0) { ajDie("Cannot proceed without header with -accid\n"); } if(!gValID(seqid)) { ajDie("Invalid accession ID:%S, exiting\n", seqid); } } url = ajStrNew(); if(isgbk) { ajFmtPrintS(&url, "http://%S/%S/%S", base, restid, access); } else { ajFmtPrintS(&url, "http://%S/%S/*/%S/%S", base, restid, access, argument); } if(!gFilebuffURLS(url, &buff)) { ajDie("GET error from %S\n", url); } while(ajBuffreadLine(buff, &line)) { if(isgbk){ ajFmtPrintF(outfile, "%S", line); continue; } ajStrRemoveLastNewline(&line); regex = ajRegCompC("^>"); if(ajRegExec(regex, line)) { head = ajStrNew(); ajStrAssignS(&head, line); ajStrTrimStartC(&head, ">"); valid = ajFalse; token = ajStrTokenNewC(ajStrNewS(gene), " ,\t\r\n"); while(ajStrTokenNextParse(token, ®exstr)) { if(ajStrGetLen(regexstr)) { regex = ajRegComp(regexstr); if(ajRegExec(regex, line)) { valid = ajTrue; if(ajStrIsAlnum(regexstr)) { ajStrExchangeSC(&gene, regexstr, ""); } } ajRegFree(®ex); } } } else { if(valid) { if(isseq) { ajStrFmtWrap(&line, 60); ajFmtPrintF(outfile, ">%S\n%S\n", head, line); } else { ajFmtPrintF(outfile, "%S,%S\n", head, line); } valid = ajFalse; } } } ajFileClose(&outfile); ajStrDel(&restid); ajStrDel(&seqid); ajStrDel(&inseq); } ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&access); ajStrDel(&gene); embExit(); }
void embPatternRegexSearch (AjPFeattable ftable, const AjPSeq seq, const AjPPatternRegex pat, AjBool reverse) { ajint pos=0; ajint off; ajint len; AjPFeature sf = NULL; AjPStr substr = NULL; AjPStr seqstr = NULL; AjPStr tmpstr = NULL; AjPStr tmp = ajStrNew(); AjPRegexp patexp = ajPatternRegexGetCompiled(pat); ajint adj; AjBool isreversed; AjPSeq revseq; ajint seqlen; seqlen = ajSeqGetLen(seq); if(!seqlen) return; isreversed = ajSeqIsReversedTrue(seq); if(isreversed) seqlen += ajSeqGetOffset(seq); pos = ajSeqGetBeginTrue(seq); adj = ajSeqGetEndTrue(seq); if(!ajStrGetLen(featMotifProt)) ajStrAssignC(&featMotifProt, "SO:0001067"); if(!ajStrGetLen(featMotifNuc)) ajStrAssignC(&featMotifNuc, "SO:0000714"); /*ajDebug("embPatternRegexSearch pos: %d adj: %d reverse: %B\n", pos, adj, reverse, isreversed);*/ /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n", seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq), ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq));*/ if (reverse) { revseq = ajSeqNewSeq(seq); ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), pos-1, adj-1); ajSeqstrReverse(&seqstr); } ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), pos-1, adj-1); ajStrFmtUpper(&seqstr); while(ajStrGetLen(seqstr) && ajRegExec(patexp, seqstr)) { off = ajRegOffset(patexp); len = ajRegLenI(patexp, 0); if(off || len) { ajRegSubI(patexp, 0, &substr); ajRegPost(patexp, &tmp); ajStrAssignS(&seqstr, substr); ajStrAppendS(&seqstr, tmp); pos += off; /*ajDebug("match pos: %d adj: %d len: %d off:%d\n", pos, adj, len, off);*/ if (reverse) sf = ajFeatNew(ftable, NULL, featMotifNuc, adj - pos - len + 2, adj - pos + 1, 0.0, '-', 0); else { if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable)) sf = ajFeatNewProt(ftable, NULL, featMotifProt, pos, pos + len - 1, 0.0); else sf = ajFeatNew(ftable, NULL, featMotifNuc, pos, pos + len - 1, 0.0, '.', 0); } if(isreversed) ajFeatReverse(sf, seqlen); ajFmtPrintS (&tmpstr,"*pat %S: %S", ajPatternRegexGetName(pat), ajPatternRegexGetPattern(pat)); ajFeatTagAdd (sf,NULL,tmpstr); pos += 1; ajStrCutStart(&seqstr, 1); } else { pos++; ajStrCutStart(&seqstr, 1); } } ajStrDel(&tmpstr); ajStrDel(&tmp); ajStrDel(&substr); ajStrDel(&seqstr); if(reverse) ajSeqDel(&revseq); return; }
static void stssearch_primTest(void **x,void *cl) { Primer* p; Primer primdata; AjBool testa; AjBool testb; AjBool testc; AjBool testd; ajint ioff; (void) cl; /* make it used */ p = (Primer*) x; primdata = *p; ntests++; if(!(ntests % 1000)) ajDebug("completed tests: %d\n", ntests); testa = ajRegExec(primdata->Prima, seqstr); if(testa) { ioff = ajRegOffset(primdata->Prima); ajDebug("%s: %S PrimerA matched at %d\n", ajSeqGetNameC(seq), primdata->Name, ioff); ajFmtPrintF(out, "%s: %S PrimerA matched at %d\n", ajSeqGetNameC(seq), primdata->Name, ioff); ajRegTrace(primdata->Prima); } testb = ajRegExec(primdata->Primb, seqstr); if(testb) { ioff = ajRegOffset(primdata->Primb); ajDebug("%s: %S PrimerB matched at %d\n", ajSeqGetNameC(seq), primdata->Name, ioff); ajFmtPrintF(out, "%s: %S PrimerB matched at %d\n", ajSeqGetNameC(seq), primdata->Name, ioff); ajRegTrace(primdata->Primb); } testc = ajRegExec(primdata->Prima, revstr); if(testc) { ioff = ajStrGetLen(seqstr) - ajRegOffset(primdata->Prima); ajDebug("%s: (rev) %S PrimerA matched at %d\n", ajSeqGetNameC(seq), primdata->Name, ioff); ajFmtPrintF(out, "%s: (rev) %S PrimerA matched at %d\n", ajSeqGetNameC(seq), primdata->Name, ioff); ajRegTrace(primdata->Prima); } testd = ajRegExec(primdata->Primb, revstr); if(testd) { ioff = ajStrGetLen(seqstr) - ajRegOffset(primdata->Primb); ajDebug("%s: (rev) %S PrimerB matched at %d\n", ajSeqGetNameC(seq), primdata->Name, ioff); ajFmtPrintF(out, "%s: (rev) %S PrimerB matched at %d\n", ajSeqGetNameC(seq), primdata->Name, ioff); ajRegTrace(primdata->Primb); } return; }
/* @funcstatic domainalign_stamp ********************************************** ** ** Call STAMP and process files. ** ** @param [r] prevdomain [AjPDomain] Previous domain. ** @param [r] domain [AjPDomain] This domain. ** @param [r] daf [AjPDirout] Domain alignment files. ** @param [r] super [AjPDirout] Superimposition files. ** @param [r] singlets [AjPDirout] Singlet files. ** @param [r] align [AjPStr] Align. ** @param [r] alignc [AjPStr] Alignc. ** @param [r] dom [AjPStr] Dom. ** @param [r] name [AjPStr] Name. ** @param [r] set [AjPStr] Name of set file. ** @param [r] scan [AjPStr] Name of scan file. ** @param [r] sort [AjPStr] Name of sort file. ** @param [r] log [AjPStr] Log file name. ** @param [r] out [AjPStr] Out file name. ** @param [r] keepsinglets [AjBool] Keep singlet sequences or not. ** @param [r] moden [ajint] Mode number. ** @param [r] noden [ajint] Node number. ** @param [r] nset [ajint] Number in set. ** @param [r] logf [AjPFile] Lof file. ** ** ** @return [void] True on success ** @@ ****************************************************************************/ static void domainalign_stamp(AjPDomain prevdomain, AjPDomain domain, AjPDirout daf, AjPDirout super, AjPDirout singlets, AjPStr align, AjPStr alignc, AjPStr dom, AjPStr name, AjPStr set, AjPStr scan, AjPStr sort, AjPStr log, AjPStr out, AjBool keepsinglets, ajint moden, ajint noden, ajint nset, AjPFile logf) { AjPStr exec = NULL; /* The UNIX command line to be executed. */ AjPFile clusterf = NULL; /* File pointer for log file. */ ajint ncluster = 0; /* Counter for the number of clusters. */ AjPStr line = NULL; /* Holds a line from the log file. */ AjPRegexp rexp = NULL; /* For parsing no. of clusters in log file */ AjPStr temp = NULL; /* A temporary string. */ ajint x = 0; /* Loop counter. */ exec = ajStrNew(); line = ajStrNew(); temp = ajStrNew(); rexp = ajRegCompC("^(Cluster:)"); ajDebug("domainalign_stamp name: '%S'\n", name); /* Call STAMP. */ ajFmtPrintS(&exec, "%S -l %S -s -n 2 -slide 5 -prefix %S -d %S", ajAcdGetpathC("stamp"), dom, name, set); ajFmtPrint("\n%S\n\n", exec); ajSysExecS(exec); ajFmtPrintS(&exec, "%S -f %S -s Sc 2.5", ajAcdGetpathC("sorttrans"), scan); ajFmtPrint("\n%S > %S\n\n", exec, sort); ajSysExecOutnameS(exec, sort); ajFmtPrintS(&exec, "%S -l %S -prefix %S", ajAcdGetpathC("stamp"), sort, name); ajFmtPrint("\n%S > %S\n\n", exec, log); ajSysExecOutnameS(exec, log); ajFmtPrintS(&exec, "%S -f %S -g -o %S", ajAcdGetpathC("transform"), sort, alignc); ajFmtPrint("\n%S\n\n", exec); ajSysExecS(exec); /* Count the number of clusters in the log file. */ if(!(clusterf=ajFileNewInNameS(log))) ajFatal("Could not open log file '%S'\n", log); ncluster=0; while(ajReadlineTrim(clusterf,&line)) if(ajRegExec(rexp,line)) ncluster++; ajFileClose(&clusterf); ajDebug("ncluster: %d\n", ncluster); /* Call STAMP ... calculate two fields for structural equivalence using threshold Pij value of 0.5, see stamp manual v4.1 pg 27. */ ajFmtPrintS(&exec,"%S -f %S.%d -min 0.5", ajAcdGetpathC("poststamp"), name, ncluster); ajFmtPrint("%S\n\n", exec); ajSysExecS(exec); /* Call STAMP ... convert block format alignment into clustal format. */ ajFmtPrintS(&exec,"%S -f %S.%d.post", ajAcdGetpathC("ver2hor"), name, ncluster); ajFmtPrint("%S > %S\n\n", exec, out); ajSysExecOutnameS(exec, out); /* Process STAMP alignment file and generate alignment file for output. */ domainalign_ProcessStampFile(out, align, prevdomain, noden, logf); /* Remove all temporary files. */ for(x=1;x<ncluster+1;x++) { ajFmtPrintS(&temp, "%S.%d", name, x); ajSysFileUnlinkS(temp); } ajFmtPrintS(&temp, "%S.%d.post", name, ncluster); ajSysFileUnlinkS(temp); ajStrDel(&exec); ajStrDel(&line); ajStrDel(&temp); ajRegFree(&rexp); return; }
static AjBool dbxflat_ParseFastq(EmbPBtreeEntry entry, AjPFile inf) { AjPStr line = NULL; ajlong pos = 0L; ajuint seqlen = 0; ajuint qlen = 0; AjPStr tmpfd = NULL; AjPStr str = NULL; AjPStr de = NULL; AjBool ok; if(!dbxflat_wrdexp) dbxflat_wrdexp = ajRegCompC("([A-Za-z0-9.:=]+)"); line = ajStrNewC(""); pos = ajFileResetPos(inf); if(!ajReadlineTrim(inf,&line)) { ajStrDel(&line); return ajFalse; } /* first line of entry */ if(!ajStrPrefixC(line,"@")) return ajFalse; entry->fpos = pos; ajStrCutStart(&line, 1); ajStrExtractFirst(line, &de, &entry->id); if(desfield && ajStrGetLen(de)) { while(ajRegExec(dbxflat_wrdexp,de)) { ajRegSubI(dbxflat_wrdexp, 1, &tmpfd); str = ajStrNew(); ajStrAssignS(&str,tmpfd); ajListPush(desfield->data,(void *)str); ajRegPost(dbxflat_wrdexp, &de); } } /* now read sequence */ ok = ajReadlineTrim(inf,&line); while(ok && !ajStrPrefixC(line, "+")) { ajStrRemoveWhite(&line); seqlen += MAJSTRGETLEN(line); ok = ajReadlineTrim(inf,&line); } if(!ok) return ajFalse; ok = ajReadlineTrim(inf,&line); while(ok) { qlen += MAJSTRGETLEN(line); if(qlen < seqlen) ok = ajReadlineTrim(inf,&line); else ok = ajFalse; } ajStrDel(&de); ajStrDel(&tmpfd); ajStrDel(&line); return ajTrue; }
int main(int argc, char *argv[]) { embInitPV("greporiter", argc, argv, "GEMBASSY", "1.0.3"); AjPSeqall seqall; AjPSeq seq; AjPStr inseq = NULL; AjBool accid = ajFalse; AjPStr restid = NULL; AjPStr seqid = NULL; AjPStr base = NULL; AjPStr url = NULL; AjBool oriloc = 0; AjBool gcskew = 0; AjBool dbonly = 0; ajint difthreshold = 0; AjPFile outf = NULL; AjPFile tmpfile = NULL; AjPStr tmpname = NULL; AjPStr fstname = NULL; AjPFilebuff tmp = NULL; AjPStr line = NULL; AjPSeqout tmpout = NULL; AjPRegexp regex; AjPStr ori = NULL; AjPStr ter = NULL; seqall = ajAcdGetSeqall("sequence"); difthreshold = ajAcdGetInt("difthreshold"); oriloc = ajAcdGetBoolean("oriloc"); gcskew = ajAcdGetBoolean("gcskew"); dbonly = ajAcdGetBoolean("dbonly"); accid = ajAcdGetBoolean("accid"); outf = ajAcdGetOutfile("outfile"); base = ajStrNewC("rest.g-language.org"); gAssignUniqueName(&tmpname); gAssignUniqueName(&fstname); ajStrAppendC(&fstname, ".fasta"); while(ajSeqallNext(seqall, &seq)) { inseq = ajStrNew(); tmpout = ajSeqoutNew(); if(!accid) { if(gFormatGenbank(seq, &inseq)) { tmpfile = ajFileNewOutNameS(tmpname); if(!tmpfile) { ajDie("Output file (%S) open error\n", tmpname); } ajFmtPrintF(tmpfile, "%S", inseq); ajFileClose(&tmpfile); ajFmtPrintS(&url, "http://%S/upload/upl.pl", base); gFilePostSS(url, tmpname, &restid); ajStrDel(&url); ajSysFileUnlinkS(tmpname); } else { if(!ajSeqoutOpenFilename(tmpout, fstname)) { embExitBad(); } ajSeqoutSetFormatS(tmpout,ajStrNewC("fasta")); ajSeqoutWriteSeq(tmpout, seq); ajSeqoutClose(tmpout); ajSeqoutDel(&tmpout); ajFmtPrintS(&url, "http://%S/upload/upl.pl", base); gFilePostSS(url, fstname, &restid); ajStrDel(&url); ajSysFileUnlinkS(fstname); } } ajStrAssignS(&seqid, ajSeqGetAccS(seq)); if(ajStrGetLen(seqid) == 0) { ajStrAssignS(&seqid, ajSeqGetNameS(seq)); } if(ajStrGetLen(seqid) == 0) { ajWarn("No valid header information\n"); } if(accid) { ajStrAssignS(&restid, seqid); if(ajStrGetLen(seqid) == 0) { ajDie("Cannot proceed without header with -accid\n"); } if(!gValID(seqid)) { ajDie("Invalid accession ID:%S, exiting\n", seqid); } } url = ajStrNew(); ajFmtPrintS(&url, "http://%S/%S/rep_ori_ter/oriloc=%d/gcskew=%d/" "difthreshold=%d/dbonly=%d/", base, restid, oriloc, gcskew, difthreshold, dbonly); if(!gFilebuffURLS(url, &tmp)) { ajDie("Failed to download result from:\n%S\n", url); } ajBuffreadLine(tmp, &line); regex = ajRegCompC("([0-9]+),([0-9]+)"); if(ajRegExec(regex, line)) { if(ajRegSubI(regex, 1, &ori), ajRegSubI(regex, 2, &ter)) { ajFmtPrint("%S Origin: %S Terminus %S\n", seqid, ori, ter); } } ajStrDel(&url); ajStrDel(&restid); ajStrDel(&seqid); ajStrDel(&inseq); } ajFileClose(&outf); ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&base); embExit(); return 0; }