static void jaspscan_ParseInput(const AjPStr dir, const AjPStr jaspdir, const AjPStr mats, const AjPStr excl, ajuint *recurs, AjPList ret) { ajuint nm = 0; ajuint ne = 0; AjPStr *carr = NULL; AjPStr *earr = NULL; AjPFile inf = NULL; AjPStr line = NULL; AjPStr comm = NULL; AjPStr val = NULL; ajuint i; ajuint j; char c; ajuint rlen = 0; if(*recurs > JASPSCAN_RECURS) ajFatal("Too many recursion levels in matrix list files"); line = ajStrNew(); comm = ajStrNew(); if(mats) { nm = ajArrCommaList(mats,&carr); for(i=0; i < nm; ++i) { if(ajStrGetCharFirst(carr[i]) != '@') ajStrFmtUpper(&carr[i]); if(ajStrMatchC(carr[i],"ALL")) { jaspscan_GetFileList(dir, jaspdir, "*", ret); ajListSortUnique(ret, ajStrVcmp, jaspscan_strdel); } else if(ajStrGetCharFirst(carr[i]) == '@') { ajStrTrimStartC(&carr[i],"@"); inf = ajFileNewInNameS(carr[i]); if(!inf) ajFatal("Cannot open list file %S",carr[i]); while(ajReadlineTrim(inf,&line)) { ajStrRemoveWhite(&line); c = ajStrGetCharFirst(line); if(c == '#' || c== '!') continue; if(ajStrGetLen(comm)) ajStrAppendC(&comm,","); ajStrFmtUpper(&line); ajStrAppendS(&comm,line); } *recurs += 1; jaspscan_ParseInput(dir,jaspdir,comm,NULL,recurs,ret); *recurs -= 1; ajListSortUnique(ret, ajStrVcmp, jaspscan_strdel); ajFileClose(&inf); } else { jaspscan_GetFileList(dir,jaspdir,ajStrGetPtr(carr[i]),ret); ajListSortUnique(ret, ajStrVcmp, jaspscan_strdel); } } for(i=0; i < nm; ++i) ajStrDel(&carr[i]); AJFREE(carr); } if(excl) { ne = ajArrCommaList(excl,&earr); for(i=0; i < ne; ++i) { if(ajStrGetCharFirst(earr[i]) != '@') ajStrFmtUpper(&earr[i]); if(ajStrGetCharFirst(earr[i]) == '@') { ajStrTrimStartC(&earr[i],"@"); inf = ajFileNewInNameS(earr[i]); if(!inf) ajFatal("Cannot open list file %S",earr[i]); while(ajReadlineTrim(inf,&line)) { ajStrRemoveWhite(&line); c = ajStrGetCharFirst(line); if(c == '#' || c== '!') continue; if(ajStrGetLen(comm)) ajStrAppendC(&comm,","); ajStrFmtUpper(&line); ajStrAppendS(&comm,line); } *recurs += 1; jaspscan_ParseInput(dir,jaspdir,NULL,comm,recurs,ret); *recurs -= 1; ajListSortUnique(ret, ajStrVcmp, jaspscan_strdel); ajFileClose(&inf); } else { ajStrAssignS(&line,earr[i]); ajStrAppendC(&line,J_EXT); rlen = ajListGetLength(ret); for(j=0; j < rlen; ++j) { ajListPop(ret,(void **)&val); if(ajStrSuffixS(val,line)) ajStrDel(&val); else ajListPushAppend(ret,(void *)val); } } } for(i=0; i < ne; ++i) ajStrDel(&earr[i]); AJFREE(earr); } ajStrDel(&line); ajStrDel(&comm); return; }
int main(int argc, char *argv[]) { embInitPV("genret", argc, argv, "GEMBASSY", "1.0.3"); AjPSeqall seqall; AjPSeq seq = NULL; AjPStr inseq = NULL; AjPStr gene = NULL; AjPStr access = NULL; AjBool accid = ajTrue; AjPStr argument = NULL; AjPFile outfile = NULL; AjPStr seqid = NULL; AjPStr restid = NULL; AjBool valid = ajFalse; AjBool isseq = ajFalse; AjBool isgbk = ajFalse; AjPFilebuff buff = NULL; AjPFile tmpfile = NULL; AjPStr tmpname = NULL; AjPStr regexstr = NULL; AjPStrTok token = NULL; AjPRegexp regex = NULL; AjPStr url = NULL; AjPStr base = NULL; AjPStr head = NULL; AjPStr line = NULL; seqall = ajAcdGetSeqall("sequence"); access = ajAcdGetString("access"); gene = ajAcdGetString("gene"); argument = ajAcdGetString("argument"); accid = ajAcdGetBoolean("accid"); outfile = ajAcdGetOutfile("outfile"); if( ajStrMatchC(access, "translation") || ajStrMatchC(access, "get_exon") || ajStrMatchC(access, "get_exons") || ajStrMatchC(access, "get_cdsseq") || ajStrMatchC(access, "get_gbkseq") || ajStrMatchC(access, "get_geneseq") || ajStrMatchC(access, "get_intron") || ajStrMatchC(access, "getseq") || ajStrMatchC(access, "seq") || ajStrMatchC(access, "around_startcodon") || ajStrMatchC(access, "around_stopcodon") || ajStrMatchC(access, "before_startcodon") || ajStrMatchC(access, "before_stopcodon") || ajStrMatchC(access, "after_startcodon") || ajStrMatchC(access, "after_stopcodon") ) { isseq = ajTrue; } else if(ajStrMatchC(access, "annotate") || ajStrMatchC(access, "output")) { isgbk = ajTrue; } else { ajFmtPrintF(outfile, "gene,%S\n", access); } base = ajStrNewC("rest.g-language.org"); ajStrExchangeCC(&argument, " ", "/"); ajStrExchangeCC(&argument, ",", "/"); ajStrExchangeCC(&argument, "\t", "/"); ajStrExchangeCC(&argument, "\r", "/"); ajStrExchangeCC(&argument, "\n", "/"); if(ajStrMatchC(gene, "*")) { ajStrInsertK(&gene, 0, '.'); } if(ajStrPrefixC(gene, "@") || ajStrPrefixC(gene, "list::")) { ajStrExchangeCC(&gene, "@", ""); ajStrExchangeCC(&gene, "list::", ""); ajStrAssignS(&tmpname, gene); tmpfile = ajFileNewInNameS(tmpname); if(!tmpfile) { ajDie("List file (%S) open error\n", tmpname); } gene = ajStrNew(); while(ajReadline(tmpfile, &line)) { ajStrAppendS(&gene, line); } ajFileClose(&tmpfile); ajStrDel(&tmpname); ajStrDel(&line); } tmpname = ajStrNew(); gAssignUniqueName(&tmpname); while(ajSeqallNext(seqall, &seq)) { inseq = ajStrNew(); if(!accid) { if(gFormatGenbank(seq, &inseq)) { tmpfile = ajFileNewOutNameS(tmpname); if(!tmpfile) { ajDie("Output file (%S) open error\n", tmpname); } ajFmtPrintF(tmpfile, "%S", inseq); ajFileClose(&tmpfile); ajFmtPrintS(&url, "http://%S/upload/upl.pl", base); gFilePostSS(url, tmpname, &restid); ajStrDel(&url); ajSysFileUnlinkS(tmpname); } else { ajWarn("Sequence does not have features\n" "Proceeding with sequence accession ID\n"); accid = ajTrue; } } ajStrAssignS(&seqid, ajSeqGetAccS(seq)); if(ajStrGetLen(seqid) == 0) { ajStrAssignS(&seqid, ajSeqGetNameS(seq)); } if(ajStrGetLen(seqid) == 0) { ajWarn("No valid header information\n"); } if(accid) { ajStrAssignS(&restid, seqid); if(ajStrGetLen(seqid) == 0) { ajDie("Cannot proceed without header with -accid\n"); } if(!gValID(seqid)) { ajDie("Invalid accession ID:%S, exiting\n", seqid); } } url = ajStrNew(); if(isgbk) { ajFmtPrintS(&url, "http://%S/%S/%S", base, restid, access); } else { ajFmtPrintS(&url, "http://%S/%S/*/%S/%S", base, restid, access, argument); } if(!gFilebuffURLS(url, &buff)) { ajDie("GET error from %S\n", url); } while(ajBuffreadLine(buff, &line)) { if(isgbk){ ajFmtPrintF(outfile, "%S", line); continue; } ajStrRemoveLastNewline(&line); regex = ajRegCompC("^>"); if(ajRegExec(regex, line)) { head = ajStrNew(); ajStrAssignS(&head, line); ajStrTrimStartC(&head, ">"); valid = ajFalse; token = ajStrTokenNewC(ajStrNewS(gene), " ,\t\r\n"); while(ajStrTokenNextParse(token, ®exstr)) { if(ajStrGetLen(regexstr)) { regex = ajRegComp(regexstr); if(ajRegExec(regex, line)) { valid = ajTrue; if(ajStrIsAlnum(regexstr)) { ajStrExchangeSC(&gene, regexstr, ""); } } ajRegFree(®ex); } } } else { if(valid) { if(isseq) { ajStrFmtWrap(&line, 60); ajFmtPrintF(outfile, ">%S\n%S\n", head, line); } else { ajFmtPrintF(outfile, "%S,%S\n", head, line); } valid = ajFalse; } } } ajFileClose(&outfile); ajStrDel(&restid); ajStrDel(&seqid); ajStrDel(&inseq); } ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&access); ajStrDel(&gene); embExit(); }