static AjBool dbiblast_blastopenlib(const AjPStr name, AjBool usesrc, ajint blastv, char dbtype, PBlastDb* pdb) { AjPStr hname = NULL; AjPStr sname = NULL; AjPStr tname = NULL; static AjPStr dbname = NULL; ajint rdtmp = 0; ajint rdtmp2 = 0; ajint itype; ajint ttop; PMemFile TFile = NULL; PBlastDb ret; for(itype=0; blasttypes[itype].ExtT; itype++) { if((blastv == 1) && blasttypes[itype].IsBlast2) continue; if((blastv == 2) && !blasttypes[itype].IsBlast2) continue; if((dbtype == 'P') && !blasttypes[itype].IsProtein) continue; if((dbtype == 'N') && blasttypes[itype].IsProtein) continue; if(dbiblast_wrongtype(name, blasttypes[itype].ExtT)) continue; dbiblast_dbname(&dbname,name,blasttypes[itype].ExtT); dbiblast_newname(&tname,dbname,blasttypes[itype].ExtT); TFile = dbiblast_memfopenfile(tname); if(TFile) break; } if(!TFile) return ajFalse; AJNEW0(*pdb); ret = *pdb; ret->TFile = TFile; ajStrAssignS(&ret->Name, dbname); ajDebug("Name '%S'\n", ret->Name); /* find and open the 'table' file(s) */ if(!ret->TFile) ajFatal(" cannot open %S table file %S\n", dbname, tname); ajDebug("Successfully opened table file for type %d\n", itype); ret->IsProtein = blasttypes[itype].IsProtein; ret->IsBlast2 = blasttypes[itype].IsBlast2; /* read the type and format - all databases */ dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->DbType); dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->DbFormat); ret->HeaderLen += 8; ajDebug("dbtype: %x dbformat: %x\n", ret->DbType, ret->DbFormat); /* Open the header and (compressed) sequence files */ /* for DNA, also look for the FASTA file */ dbiblast_newname(&hname,dbname,blasttypes[itype].ExtH); if((ret->HFile = dbiblast_memfopenfile(hname))==NULL) ajFatal(" cannot open %S header file\n",hname); dbiblast_newname(&sname,dbname,blasttypes[itype].ExtS); if((ret->SFile = dbiblast_memfopenfile(sname))==NULL) ajFatal(" cannot open %S sequence file\n",sname); if(!ret->IsBlast2 && !ret->IsProtein && usesrc) /* this can fail */ if((ret->FFile = dbiblast_memfopenfile(dbname))==NULL) ajDebug(" cannot open %S source file\n",dbname); /* read the title - all formats */ dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->TitleLen); /* blast2 does not align after the title */ if(ret->IsBlast2) rdtmp = ret->TitleLen; else rdtmp = ret->TitleLen + ((ret->TitleLen%4 !=0 ) ? 4-(ret->TitleLen%4) : 0); ajStrAssignResC(&ret->Title, rdtmp+1, ""); ajDebug("IsBlast2: %B title_len: %d rdtmp: %d title_str: '%S'\n", ret->IsBlast2, ret->TitleLen, rdtmp, ret->Title); ajStrTrace(ret->Title); dbiblast_memfreadS(&ret->Title,(size_t)1,(size_t)rdtmp,ret->TFile); if(ret->IsBlast2) ajStrSetValidLen(&ret->Title, ret->TitleLen); else ajStrSetValidLen(&ret->Title, ret->TitleLen-1); ajDebug("title_len: %d rdtmp: %d title_str: '%S'\n", ret->TitleLen, rdtmp, ret->Title); ret->HeaderLen += 4 + rdtmp; /* read the date - blast2 */ if(ret->IsBlast2) { dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->DateLen); rdtmp2 = ret->DateLen; ajStrAssignResC(&ret->Date, rdtmp2+1, ""); dbiblast_memfreadS(&ret->Date,(size_t)1,(size_t)rdtmp2,ret->TFile); ajStrSetValid(&ret->Date); ret->DateLen = ajStrGetLen(ret->Date); ajDebug("datelen: %d rdtmp: %d date: '%S'\n", ret->DateLen, rdtmp2, ret->Date); ret->HeaderLen += 4 + rdtmp2; } /* read the rest of the header (different for protein and DNA) */ if(!ret->IsBlast2 && !ret->IsProtein) { /* length of source lines */ dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->LineLen); ret->HeaderLen += 4; } /* all formats have the next 3 */ dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->Size); if(ret->IsProtein) { /* mad, but they are the other way for DNA */ dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->TotLen); dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->MaxSeqLen); } else { dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->MaxSeqLen); dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->TotLen); } ret->HeaderLen += 12; if(!ret->IsBlast2 && !ret->IsProtein) { /* Blast 1.4 DNA only */ /* compressed db length */ dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->CompLen); /* count of nt's cleaned */ dbiblast_memreadUInt4(ret->TFile,(ajuint*)&ret->CleanCount); ret->HeaderLen += 8; } ajDebug(" size: %u, totlen: %d maxseqlen: %u\n", ret->Size, ret->TotLen, ret->MaxSeqLen); ajDebug(" linelen: %u, complen: %d cleancount: %d\n", ret->LineLen, ret->CompLen, ret->CleanCount); /* Now for the tables of offsets. Again maddeningly different in each */ if(ret->IsBlast2) { ttop = ret->TopHdr = ret->HeaderLen; /* header first */ ttop = ret->TopCmp = ttop + (ret->Size+1) * 4; /* then sequence */ if(!ret->IsProtein) /* Blast 2 DNA only */ ttop = ret->TopAmb = ttop + (ret->Size+1) * 4; } else { ttop = ret->TopCmp = ret->HeaderLen + ret->CleanCount*4; /* comp seq */ if(!ret->IsProtein) /* Blast 1.4 DNA only */ ttop = ret->TopSrc = ttop + (ret->Size+1) * 4; ttop = ret->TopHdr = ttop + (ret->Size+1) * 4; /* headers for all */ if(!ret->IsProtein) /* Blast 1.4 DNA only */ ttop = ret->TopAmb = ttop + (ret->Size+1) * 4; } ajDebug("table file index starts at %d\n", ret->HeaderLen); ajDebug("table file csq starts at %d\n", ret->TopCmp); ajDebug("table file src starts at %d\n", ret->TopSrc); ajDebug("table file hdr starts at %d\n", ret->TopHdr); ajDebug("table file amb starts at %d\n", ret->TopAmb); ajStrDel(&hname); ajStrDel(&sname); ajStrDel(&tname); ajStrDel(&dbname); return ajTrue; }
AjBool ajReadlinePos(AjPFile file, AjPStr* Pdest, ajlong* Ppos) { const char *cp; char *buff; ajint isize; ajint ilen; ajint jlen; ajint ipos; ajuint buffsize; size_t iread; const char* pnewline = NULL; MAJSTRDEL(Pdest); if(file->Buffsize) buffsize = file->Buffsize; else buffsize = ajFileValueBuffsize(); if(!file->Buff) ajStrAssignResC(&file->Buff, buffsize, ""); else if(buffsize > MAJSTRGETRES(file->Buff)) ajStrSetRes(&file->Buff, buffsize); if(MAJSTRGETUSE(file->Buff) == 1) buff = MAJSTRGETPTR(file->Buff); else buff = ajStrGetuniquePtr(&file->Buff); isize = MAJSTRGETRES(file->Buff); ilen = 0; ipos = 0; if(!file->fp) ajWarn("ajFileGets file not found"); *Ppos = file->Filepos; while(buff) { if(file->End) { ajStrAssignClear(Pdest); ajDebug("at EOF: File already read to end %F\n", file); return ajFalse; } #ifndef __ppc__ if(file->Readblock) { if(file->Blockpos >= file->Blocklen) { iread = fread(file->Readblock, 1, file->Blocksize, file->fp); if(!iread && ferror(file->fp)) ajFatal("fread failed with error:%d '%s'", ferror(file->fp), strerror(ferror(file->fp))); file->Blockpos = 0; file->Blocklen = iread; file->Readblock[iread] = '\0'; /*ajDebug("++ fread %u Ppos:%Ld\n", iread, *Ppos);*/ } if(file->Blockpos < file->Blocklen) { /* we know we have something in Readblock to process */ pnewline = strchr(&file->Readblock[file->Blockpos], '\n'); if(pnewline) jlen = pnewline - &file->Readblock[file->Blockpos] + 1; else jlen = file->Blocklen - file->Blockpos; /*ajDebug("ipos:%d jlen:%d pnewline:%p " "Readblock:%p blockpos:%d blocklen:%d\n", ipos, jlen, pnewline, file->Readblock, file->Blockpos, file->Blocklen);*/ memmove(&buff[ipos], &file->Readblock[file->Blockpos], jlen); buff[ipos+jlen]='\0'; cp = &buff[ipos]; file->Blockpos += jlen; } else { jlen = 0; cp = NULL; } } else { cp = fgets(&buff[ipos], isize, file->fp); jlen = strlen(&buff[ipos]); } #else cp = ajSysFuncFgets(&buff[ipos], isize, file->fp); jlen = strlen(&buff[ipos]); #endif if(!cp && !ipos) { if(feof(file->fp)) { file->End = ajTrue; ajStrAssignClear(Pdest); ajDebug("EOF ajFileGetsL file %F\n", file); return ajFalse; } else ajFatal("Error reading from file '%S'\n", ajFileGetNameS(file)); } ilen += jlen; file->Filepos += jlen; /* ** We need to read again if: ** We have read the entire buffer ** and we don't have a newline at the end ** (must be careful about that - we may just have read enough) */ if(((file->Readblock && !pnewline) ||(jlen == (isize-1))) && (buff[ilen-1] != '\n')) { MAJSTRSETVALIDLEN(&file->Buff, ilen); /* fix before resizing! */ ajStrSetResRound(&file->Buff, ilen+buffsize+1); /*ajDebug("more to do: jlen: %d ipos: %d isize: %d ilen: %d " "Size: %d\n", jlen, ipos, isize, ilen, ajStrGetRes(file->Buff));*/ ipos += jlen; buff = ajStrGetuniquePtr(&file->Buff); isize = ajStrGetRes(file->Buff) - ipos; /*ajDebug("expand to: ipos: %d isize: %d Size: %d\n", ipos, isize, ajStrGetRes(file>Buff));*/ } else buff = NULL; } MAJSTRSETVALIDLEN(&file->Buff, ilen); if (ajStrGetCharLast(file->Buff) != '\n') { /*ajDebug("Appending missing newline to '%S'\n", file->Buff);*/ ajStrAppendK(&file->Buff, '\n'); } ajStrAssignRef(Pdest, file->Buff); /* if(file->Readblock) ajDebug("ajFileGetsL done blocklen:%d blockpos:%d readlen:%u\n", file->Blocklen, file->Blockpos, ajStrGetLen(file->Buff)); */ return ajTrue; }
int main(int argc, char **argv) { /* ** All pointers set to NULL for safety. ** Variables names and initialisation values aligned for clarity. */ AjBool boo = ajFalse; ajint n1 = 0; ajint n2 = 0; ajlong l1 = 0; /* long int */ float f1 = 0.0; double d1 = 0.0; /* there is no long double */ size_t size = 100; /* Reserved memory size. Could be any value you know in advance. */ embInit("demostringnew", argc, argv); demostringnew_msg("/* Starting string values */"); /* Functions with the prefix ajStr are for manipulating EMBOSS strings. Functions with the prefix ajChar are for manipulating C-type (char*) string See filesection and datasection sections in ajstr.c */ /* ** String constructor functions ** See "@section constructors" in ajstr.c */ /* Construct a new string with no starting value or reserved size. There is no equivlent function for C-type (char*) strings */ str0 = ajStrNew (); /* Construct a new string with a reserved size but no starting value */ txt1 = ajCharNewRes(size); str1 = ajStrNewRes (size); /* Construct a new C-type (char*) string with a starting value ... */ txt2 = ajCharNewC ("Starting value"); /* ... copied from a C-type (char*) string */ str2 = ajStrNewC (txt2); /* ... copied from a C-type (char*) string */ txt3 = ajCharNewS (str2); /* ... copied from a string */ str3 = ajStrNewS (str2); /* ... copied from a string */ /* Construct a new string with a reserved size and starting value ... */ txt4 = ajCharNewResC("Starting value, reserved size", size); /* ... copied from a C-type (char*) string)*/ str4 = ajStrNewResC (txt4, size); /* ... copied from a C-type (char*) string */ /* or str4 = ajStrNewResLenC(txt4, size, strlen(txt4)); to specify string length */ txt5 = ajCharNewResS(str4, size); /* ... copied from a string */ str5 = ajStrNewResS (str4, size); /* ... copied from a string */ demostringnew_msg("/* After string constructor functions */"); /* ** String destructor functions ** See "@section destructors" in ajstr.c) */ /* Destruct a string */ ajCharDel(&txt1); ajCharDel(&txt2); ajCharDel(&txt3); ajCharDel(&txt4); ajCharDel(&txt5); ajStrDel (&str0); ajStrDel (&str1); ajStrDel (&str3); ajStrDel (&str5); /* str2 & str4 still in memory */ demostringnew_msg("/* After string destructor functions */"); /* ** String (de)referencing functions ** See "@section destructors" in ajstr.c) */ str0 = ajStrNewRef(str2); /* or ajStrAssignRef(&str0, str2); */ demostringnew_msg("/* After string reference */"); ajStrDelStatic(&str0); demostringnew_msg("/* After string dereference */"); /* ** String assignment functions ** See "@section assignment" in ajstr.c) */ /* Still only str2 & str4 in memory */ /* Assign a string value using ... */ ajStrAssignC(&str1, "Assigned value"); /* ... a C-type (char*) string */ /* or ajStrAssignLenC(&str1, "Assigned value", strlen("Assigned value")); to specify string length. */ ajStrAssignS(&str3, str1); /* ... a string */ ajStrAssignK(&str5, 'A'); /* ... a character */ demostringnew_msg("/* After string assignment 1 */"); ajStrAssignSubC(&str1, "Assigned value", 0, 11); ajStrAssignSubS(&str3, str1, 0, 9); demostringnew_msg("/* After string assignment 2 */"); /* The assignment functions allocate memory if necessary so str1, str3 and str5 will be created for you. It's bad practice to use this mechanism however because it's not obvious the string has been allocated (and needs freeing). Much cleaner to call the construct (ajStrNew) explicitly. */ /* Assign a string with a reserved size and value using ... */ ajStrAssignResC(&str1, size, "Assigned value, reserved size"); /* ... a C-type (char*) string */ ajStrAssignResS(&str3, size, str1); /* ... a string */ demostringnew_msg("/* After string assignment 3 */"); /* Assign a string value only if the string is empty using ... */ str0 = ajStrNew(); ajStrAssignEmptyC(&str0, "New value if string was empty"); /* ... a C-type (char*) string */ ajStrAssignEmptyS(&str1, str0); /* ... a string */ demostringnew_msg("/* After string assignment 4 */"); /* Now str0-5 in memory. The above code is for illustrative purposes: it's much cleaner to put all the constructors / destructors at the top / bottom of the code where possible. */ /* Assign all strings intuitive values */ txt0 = ajCharNewResC("TEXT 0", 100); txt1 = ajCharNewResC("TEXT 1", 100); txt2 = ajCharNewResC("Text 2", 100); txt3 = ajCharNewResC("Text 3", 100); txt4 = ajCharNewResC("Text 4", 100); txt5 = ajCharNewResC("Text 5", 100); ajStrAssignC(&str0, "STRING 0"); ajStrAssignC(&str1, "STRING 1"); ajStrAssignC(&str2, "String 2"); ajStrAssignC(&str3, "String 3"); ajStrAssignC(&str4, "String 4 WITHSOMETEXTINABLOCK"); ajStrAssignC(&str5, "String 5 WITHSOMETEXTINABLOCK"); demostringnew_msg("/* After string assignment 5 */"); /* ** String formatting functions ** See "@section formatting" in ajstr.c */ ajCharFmtLower(txt0); ajCharFmtLower(txt1); ajStrFmtLower(&str0); ajStrFmtLowerSub(&str1, 0, 2); ajCharFmtUpper(txt2); ajCharFmtUpper(txt3); ajStrFmtUpper(&str2); ajStrFmtUpperSub(&str3, 0, 2); demostringnew_msg("/* After string formatting 1 */"); ajStrFmtTitle(&str0); ajStrFmtQuote(&str1); ajStrFmtBlock(&str4, 3); demostringnew_msg("/* After string formatting 2 */"); /* See also ajStrFmtWrap, ajStrFmtWrapLeft ... these need checking. */ /* ** String conversion functions ** See "@section datatype to string conversion" in ajstr.c */ n1 = n2 = l1 = 1; f1 = d1 = 0.5; ajStrFromBool( &str0, boo); ajStrFromInt(&str1, n1); ajStrFromLong(&str2, l1); ajStrFromFloat(&str3, f1, 5); ajStrFromDouble(&str4, d1, 5); ajStrFromDoubleExp(&str5, d1, 5); demostringnew_msg("/* After datatype to string conversion */"); /* ** String conversion functions ** See "@section string to datatype conversion" in ajstr.c */ ajStrToBool(str0, &boo); ajStrToInt(str1, &n1); ajStrToLong(str2, &l1); ajStrToDouble(str4, &d1); ajUser("/* After string to datatype conversion */\n" "boo (from str0): %B\nn1 (from str1): %d\nl1 (from str2): %d", boo, n1, l1); ajFmtPrint("f1 (from str3): %f\nd1 (from str4): %f\n", f1, d1); /* Check ajUser ... doesn't support %f */ /* See also ajStrToHex */ /* Assign all strings new values */ strcpy(txt0, "Text String"); strcpy(txt1, "TEXT STRING"); strcpy(txt2, "Text*"); strcpy(txt3, "Text"); strcpy(txt4, "Text String 4"); strcpy(txt5, "Text String 5"); ajStrAssignC(&str0, "String"); ajStrAssignC(&str1, "STRING"); ajStrAssignC(&str2, "String*"); ajStrAssignC(&str3, "*String"); ajStrAssignC(&str4, "String 4"); ajStrAssignC(&str5, "String 5"); demostringnew_msg("/* After resetting strings */"); /* ** String comparison functions ** See "@section comparison" in ajstr.c */ ajUserDumpC("/* String comparison functions */"); boo = ajCharMatchC(txt0, txt1); ajUser("ajCharMatchC(txt0 txt1); == %B", boo); boo = ajCharMatchCaseC(txt0, txt1); ajUser("ajCharMatchCaseC(txt0 txt1); == %B", boo); boo = ajCharMatchC(txt0, txt2); ajUser("ajCharMatchC(txt0,txt2); == %B", boo); boo = ajCharMatchWildC(txt0, txt2); ajUser("ajCharMatchWildC(txt0,txt2); == %B", boo); boo = ajCharMatchWildS(txt0, str2); ajUser("ajCharMatchWildS(txt0,str2); == %B", boo); /* See also ajCharMatchWildNextC, ajCharMatchWildWordC ... these need checking & documentation updated. */ boo = ajCharPrefixC(txt0, txt3); ajUser("ajCharPrefixC(txt0, txt3); == %B", boo); boo = ajCharPrefixS(txt0, str0); ajUser("ajCharPrefixS(txt0, str0); == %B", boo); boo = ajCharPrefixCaseC(txt5, txt1); ajUser("ajCharPrefixCaseC(txt5, txt1); == %B", boo); boo = ajCharPrefixCaseC(txt1, txt5); ajUser("ajCharPrefixCaseC(txt1, txt5); == %B", boo); boo = ajCharPrefixCaseS(txt0, str0); ajUser("ajCharPrefixCaseS(txt0, str0); == %B", boo); boo = ajCharSuffixC(txt0, txt3); ajUser("ajCharSuffixC(txt0, txt3); === %B", boo); boo = ajCharSuffixS(txt0, str0); ajUser("ajCharSuffixS(txt0, str0); == %B", boo); /* See also ajCharSuffixCaseC, ajCharSuffixCaseC, ajCharSuffixCaseS, ajCharSuffixCaseS ... these need checking. */ boo = ajStrMatchC (str0, txt0); ajUser("ajStrMatchC (str0, txt0); == %B", boo); boo = ajStrMatchS(str0, str1); ajUser("ajStrMatchS(str0, str1); == %B", boo); boo = ajStrMatchCaseC(str0, txt0); ajUser("ajStrMatchCaseC(str0, txt0); == %B", boo); boo = ajStrMatchCaseS(str0, str0); ajUser("ajStrMatchCaseS(str0, str0); == %B", boo); /* ajUser("== %B", boo); boo = ajStrMatchWildC(str2, const char* text); ajStrMatchWildS (const AjPStr thys, const AjPStr wild); ajStrMatchWildWordC (const AjPStr str, const char* text); ajStrMatchWildWordS (const AjPStr str, const AjPStr text); ajStrPrefixC(const AjPStr str, const char* txt2); ajStrPrefixS(const AjPStr str, const AjPStr str2); ajStrPrefixCaseC (const AjPStr str, const char* pref); ajStrPrefixCaseS (const AjPStr str, const AjPStr pref); ajStrSuffixC (const AjPStr thys, const char* suff); ajStrSuffixS (const AjPStr thys, const AjPStr suff); */ /**************************************************************************/ /* String substitution functions (See "@section substitution" in ajstr.c) */ /**************************************************************************/ /* AjBool ajStrExchangeCC(AjPStr* Pstr, const char* txt, const char* txtnew); AjBool ajStrExchangeCS(AjPStr* Pstr, const char* txt, const AjPStr strnew); AjBool ajStrExchangeKK(AjPStr* Pstr, char chr, char chrnew); AjBool ajStrExchangeSC(AjPStr* Pstr, const AjPStr str, const char* txtnew); AjBool ajStrExchangeSS(AjPStr* Pstr, const AjPStr str, const AjPStr strnew); AjBool ajStrExchangeSetCC(AjPStr* Pstr, const char* oldc, const char* newc); AjBool ajStrExchangeSetSS(AjPStr* Pstr, const AjPStr str, const AjPStr strnew); AjBool ajStrRandom(AjPStr *s); AjBool ajStrReverse(AjPStr* Pstr); */ embExit(); return 0; }