static ajint dbiblast_ncblreadhdr(AjPStr* hdrline, PBlastDb db, ajint start, ajint end) { ajint size; ajint llen; PMemFile hfp; size = ajStrGetRes(*hdrline); hfp = db->HFile; if(end) { llen = end - start; if(db->IsBlast2) llen += 1; if(llen > size) llen = size; } else llen = size; /*ajDebug("ncblreadhdr start %d end %d llen %d\n", start, end, llen);*/ if(db->IsBlast2) { dbiblast_memfseek(hfp,start,0); dbiblast_memfreadS(hdrline,(size_t)1,(size_t)(llen-1),hfp); } else { dbiblast_memfseek(hfp,start+1,0); /* skip the '>' character */ dbiblast_memfreadS(hdrline,(size_t)1,(size_t)(llen-1),hfp); } ajStrSetValidLen(hdrline, (llen-1)); return llen; }
ajint ajUserGet(AjPStr* pthis, const char* fmt, ...) { AjPStr thys; const char *cp; char *buff; va_list args; ajint ipos; ajint isize; ajint ilen; ajint jlen; ajint fileBuffSize = ajFileValueBuffsize(); va_start(args, fmt); ajFmtVError(fmt, args); va_end(args); if(ajFileValueRedirectStdin()) { ajUser("(Standard input in use: using default)"); ajStrAssignC(pthis, ""); return ajStrGetLen(*pthis); } ajStrSetRes(pthis, fileBuffSize); buff = ajStrGetuniquePtr(pthis); thys = *pthis; isize = ajStrGetRes(thys); ilen = 0; ipos = 0; /*ajDebug("ajUserGet buffer len: %d res: %d ptr: %x\n", ajStrGetLen(thys), ajStrGetRes(thys), thys->Ptr);*/ while(buff) { #ifndef __ppc__ cp = fgets(&buff[ipos], isize, stdin); #else cp = ajSysFuncFgets(&buff[ipos], isize, stdin); #endif if(!cp && !ipos) { if(feof(stdin)) { ajErr("Unable to get reply from user - end of standard input"); ajExitBad(); } else ajFatal("Error reading from user: '******'\n", strerror(errno)); } jlen = strlen(&buff[ipos]); ilen += jlen; /* ** We need to read again if: ** We have read the entire buffer ** and we don't have a newline at the end ** (must be careful about that - we may just have read enough) */ ajStrSetValidLen(pthis, ilen); thys = *pthis; if((jlen == (isize-1)) && (ajStrGetCharLast(thys) != '\n')) { ajStrSetRes(pthis, ajStrGetRes(thys)+fileBuffSize); thys = *pthis; /*ajDebug("more to do: jlen: %d ipos: %d isize: %d ilen: %d " "Size: %d\n", jlen, ipos, isize, ilen, ajStrGetRes(thys));*/ ipos += jlen; buff = ajStrGetuniquePtr(pthis); isize = ajStrGetRes(thys) - ipos; /* ajDebug("expand to: ipos: %d isize: %d Size: %d\n", ipos, isize, ajStrGetRes(thys)); */ } else buff = NULL; } ajStrSetValidLen(pthis, ilen); if(ajStrGetCharLast(*pthis) == '\n') ajStrCutEnd(pthis, 1); /* PC files have \r\n Macintosh files have just \r : this fixes both */ if(ajStrGetCharLast(*pthis) == '\r') { /*ajDebug("Remove carriage-return characters from PC-style files\n");*/ ajStrCutEnd(pthis, 1); } ajStrTrimWhite(pthis); return ajStrGetLen(*pthis); }
static AjBool dbiblast_blastopenlib(const AjPStr name, AjBool usesrc, ajint blastv, char dbtype, PBlastDb* pdb) { AjPStr hname = NULL; AjPStr sname = NULL; AjPStr tname = NULL; static AjPStr dbname = NULL; ajint rdtmp = 0; ajint rdtmp2 = 0; ajint itype; ajint ttop; PMemFile TFile = NULL; PBlastDb ret; for(itype=0; blasttypes[itype].ExtT; itype++) { if((blastv == 1) && blasttypes[itype].IsBlast2) continue; if((blastv == 2) && !blasttypes[itype].IsBlast2) continue; if((dbtype == 'P') && !blasttypes[itype].IsProtein) continue; if((dbtype == 'N') && blasttypes[itype].IsProtein) continue; if(dbiblast_wrongtype(name, blasttypes[itype].ExtT)) continue; dbiblast_dbname(&dbname,name,blasttypes[itype].ExtT); dbiblast_newname(&tname,dbname,blasttypes[itype].ExtT); TFile = dbiblast_memfopenfile(tname); if(TFile) break; } if(!TFile) return ajFalse; AJNEW0(*pdb); ret = *pdb; ret->TFile = TFile; ajStrAssignS(&ret->Name, dbname); ajDebug("Name '%S'\n", ret->Name); /* find and open the 'table' file(s) */ if(!ret->TFile) ajFatal(" cannot open %S table file %S\n", dbname, tname); ajDebug("Successfully opened table file for type %d\n", itype); ret->IsProtein = blasttypes[itype].IsProtein; ret->IsBlast2 = blasttypes[itype].IsBlast2; /* read the type and format - all databases */ dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->DbType); dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->DbFormat); ret->HeaderLen += 8; ajDebug("dbtype: %x dbformat: %x\n", ret->DbType, ret->DbFormat); /* Open the header and (compressed) sequence files */ /* for DNA, also look for the FASTA file */ dbiblast_newname(&hname,dbname,blasttypes[itype].ExtH); if((ret->HFile = dbiblast_memfopenfile(hname))==NULL) ajFatal(" cannot open %S header file\n",hname); dbiblast_newname(&sname,dbname,blasttypes[itype].ExtS); if((ret->SFile = dbiblast_memfopenfile(sname))==NULL) ajFatal(" cannot open %S sequence file\n",sname); if(!ret->IsBlast2 && !ret->IsProtein && usesrc) /* this can fail */ if((ret->FFile = dbiblast_memfopenfile(dbname))==NULL) ajDebug(" cannot open %S source file\n",dbname); /* read the title - all formats */ dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->TitleLen); /* blast2 does not align after the title */ if(ret->IsBlast2) rdtmp = ret->TitleLen; else rdtmp = ret->TitleLen + ((ret->TitleLen%4 !=0 ) ? 4-(ret->TitleLen%4) : 0); ajStrAssignResC(&ret->Title, rdtmp+1, ""); ajDebug("IsBlast2: %B title_len: %d rdtmp: %d title_str: '%S'\n", ret->IsBlast2, ret->TitleLen, rdtmp, ret->Title); ajStrTrace(ret->Title); dbiblast_memfreadS(&ret->Title,(size_t)1,(size_t)rdtmp,ret->TFile); if(ret->IsBlast2) ajStrSetValidLen(&ret->Title, ret->TitleLen); else ajStrSetValidLen(&ret->Title, ret->TitleLen-1); ajDebug("title_len: %d rdtmp: %d title_str: '%S'\n", ret->TitleLen, rdtmp, ret->Title); ret->HeaderLen += 4 + rdtmp; /* read the date - blast2 */ if(ret->IsBlast2) { dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->DateLen); rdtmp2 = ret->DateLen; ajStrAssignResC(&ret->Date, rdtmp2+1, ""); dbiblast_memfreadS(&ret->Date,(size_t)1,(size_t)rdtmp2,ret->TFile); ajStrSetValid(&ret->Date); ret->DateLen = ajStrGetLen(ret->Date); ajDebug("datelen: %d rdtmp: %d date: '%S'\n", ret->DateLen, rdtmp2, ret->Date); ret->HeaderLen += 4 + rdtmp2; } /* read the rest of the header (different for protein and DNA) */ if(!ret->IsBlast2 && !ret->IsProtein) { /* length of source lines */ dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->LineLen); ret->HeaderLen += 4; } /* all formats have the next 3 */ dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->Size); if(ret->IsProtein) { /* mad, but they are the other way for DNA */ dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->TotLen); dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->MaxSeqLen); } else { dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->MaxSeqLen); dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->TotLen); } ret->HeaderLen += 12; if(!ret->IsBlast2 && !ret->IsProtein) { /* Blast 1.4 DNA only */ /* compressed db length */ dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->CompLen); /* count of nt's cleaned */ dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->CleanCount); ret->HeaderLen += 8; } ajDebug(" size: %u, totlen: %d maxseqlen: %u\n", ret->Size, ret->TotLen, ret->MaxSeqLen); ajDebug(" linelen: %u, complen: %d cleancount: %d\n", ret->LineLen, ret->CompLen, ret->CleanCount); /* Now for the tables of offsets. Again maddeningly different in each */ if(ret->IsBlast2) { ttop = ret->TopHdr = ret->HeaderLen; /* header first */ ttop = ret->TopCmp = ttop + (ret->Size+1) * 4; /* then sequence */ if(!ret->IsProtein) /* Blast 2 DNA only */ ttop = ret->TopAmb = ttop + (ret->Size+1) * 4; } else { ttop = ret->TopCmp = ret->HeaderLen + ret->CleanCount*4; /* comp seq */ if(!ret->IsProtein) /* Blast 1.4 DNA only */ ttop = ret->TopSrc = ttop + (ret->Size+1) * 4; ttop = ret->TopHdr = ttop + (ret->Size+1) * 4; /* headers for all */ if(!ret->IsProtein) /* Blast 1.4 DNA only */ ttop = ret->TopAmb = ttop + (ret->Size+1) * 4; } ajDebug("table file index starts at %d\n", ret->HeaderLen); ajDebug("table file csq starts at %d\n", ret->TopCmp); ajDebug("table file src starts at %d\n", ret->TopSrc); ajDebug("table file hdr starts at %d\n", ret->TopHdr); ajDebug("table file amb starts at %d\n", ret->TopAmb); ajStrDel(&hname); ajStrDel(&sname); ajStrDel(&tname); ajStrDel(&dbname); return ajTrue; }