void IdnaConfTest::Test(void){ if (!ReadAndConvertFile())return; UnicodeString s; UnicodeString key; UnicodeString value; // skip everything before the first "=====" and "=====" itself do { if (!ReadOneLine(s)) { errln("End of file prematurely found"); break; } } while (s.compare(C_TAG, -1) != 0); //"=====" while(ReadOneLine(s)){ s.trim(); key.remove(); value.remove(); if (s.compare(C_TAG, -1) == 0){ //"=====" Call(); } else { // explain key:value int p = s.indexOf((UChar)0x3A); // : key.setTo(s,0,p).trim(); value.setTo(s,p+1).trim(); if (key.compare(C_TYPE, -1) == 0){ if (value.compare(C_TOASCII, -1) == 0) { type = 0; } else if (value.compare(C_TOUNICODE, -1) == 0){ type = 1; } } else if (key.compare(C_PASSFAIL, -1) == 0){ if (value.compare(C_PASS, -1) == 0){ passfail = 0; } else if (value.compare(C_FAIL, -1) == 0){ passfail = 1; } } else if (key.compare(C_DESC, -1) == 0){ if (value.indexOf(C_USESTD3ASCIIRULES, u_strlen(C_USESTD3ASCIIRULES), 0) == -1){ option = 1; // not found } else { option = 0; } id.setTo(value, 0, value.indexOf((UChar)0x20)); // space } else if (key.compare(C_NAMEZONE, -1) == 0){ ExplainCodePointTag(value); namezone.setTo(value); } else if (key.compare(C_NAMEBASE, -1) == 0){ ExplainCodePointTag(value); namebase.setTo(value); } // just skip other lines } } Call(); // for last record }
/* ReadOneLine: */ static Boolean ReadOneLine(FILE * fp, char *line) { if (feof(fp)) return FALSE; line[0] = 0; fgets(line, 256, fp); if (line[0] == '#' || line[0] == 0x0a || line[0] == 0x0d) return ReadOneLine(fp, line); return TRUE; }
/* LoadScaleRate: Load scale rate file for parameter updating */ static void LoadScaleRate(char *scalefile) { char *line; FILE *fp; int i, p; float fScale; line = (char *) malloc(256 * sizeof(char)); fp = fopen(scalefile, "rt"); /* read the scale */ ReadOneLine(fp, line); fScale = (float) atof(line); for (p = 1; p <= genInfo->nPdfStream[0]; p++) { for (i = 1; i <= genInfo->pst[p].order; i++) { if (!ReadOneLine(fp, line)) break; mtInfo->SRMean[p][i] = (float) atof(line) * fScale; } } /* read the scale */ ReadOneLine(fp, line); fScale = (float) atof(line); for (p = 1; p <= genInfo->nPdfStream[0]; p++) { for (i = 1; i <= genInfo->pst[p].order; i++) { if (!ReadOneLine(fp, line)) break; mtInfo->SRVar[p][i] = (float) atof(line) * fScale; } } /* read the scale */ ReadOneLine(fp, line); fScale = (float) atof(line); for (p = 1; p <= genInfo->nPdfStream[0]; p++) { for (i = 1; i <= genInfo->pst[p].order; i++) { if (!ReadOneLine(fp, line)) break; mtInfo->DWght[p][i] = (float) atof(line) * fScale; } } free(line); fclose(fp); }
/* Function: SeqfileFormat() * * Purpose: Determine format of seqfile, and return it * through ret_format. From Gilbert's seqFileFormat(). * * If filename is "-", we will read from stdin and * assume that the stream is coming in FASTA format -- * either unaligned or aligned. * * Args: filename - name of sequence file * ret_format - RETURN: format code for file, see squid.h * for codes. * env - name of environment variable containing * a directory path that filename might also be * found in. "BLASTDB", for example. Can be NULL. * * Return: 1 on success, 0 on failure. */ int SeqfileFormat(char *filename, int *ret_format, char *env) { int foundIG = 0; int foundStrider = 0; int foundGB = 0; int foundEMBL = 0; int foundPearson = 0; int foundZuker = 0; int gotGCGdata = 0; int gotPIR = 0; int gotSquid = 0; int gotuw = 0; int gotMSF = 0; int gotClustal = 0; int done = 0; int format = kUnknown; int nlines= 0, dnalines= 0; int splen = 0; char sp[LINEBUFLEN]; FILE *fseq; /* First check if filename is "-": special case indicating * a FASTA pipe. */ if (strcmp(filename, "-") == 0) { *ret_format = kPearson; return 1; } #define ReadOneLine(sp) \ { done |= (feof(fseq)); \ readline( fseq, sp); \ if (!done) { splen = (int) strlen(sp); ++nlines; } } if ((fseq = fopen(filename, "r")) == NULL && (fseq = EnvFileOpen(filename, env)) == NULL) { squid_errno = SQERR_NOFILE; return 0; } /* Look at a line at a time */ while ( !done ) { ReadOneLine(sp); if (sp==NULL || *sp=='\0') /*EMPTY*/ ; /* high probability identities: */ else if (strstr(sp, " MSF:") != NULL && strstr(sp, " Type:") != NULL && strstr(sp, " Check:") != NULL) gotMSF = 1; else if (strncmp(sp, "CLUSTAL ", 8) == 0 && strstr( sp, "multiple sequence alignment")) gotClustal = 1; else if (strstr(sp," Check: ") != NULL) gotuw= 1; else if (strncmp(sp, "///", 3) == 0 || strncmp(sp, "ENTRY ", 6) == 0) gotPIR = 1; else if (strncmp(sp, "++", 2) == 0 || strncmp(sp, "NAM ", 4) == 0) gotSquid = 1; else if (strncmp(sp, ">>>>", 4) == 0 && strstr(sp, "Len: ")) gotGCGdata = 1; /* uncertain identities: */ else if (*sp ==';') { if (strstr(sp,"Strider") !=NULL) foundStrider= 1; else foundIG= 1; } else if (strncmp(sp,"LOCUS",5) == 0 || strncmp(sp,"ORIGIN",5) == 0) foundGB= 1; else if (*sp == '>') { foundPearson = 1; } else if (strstr(sp,"ID ") == sp || strstr(sp,"SQ ") == sp) foundEMBL= 1; else if (*sp == '(') foundZuker= 1; else { switch (Seqtype( sp )) { case kDNA: case kRNA: if (splen>20) dnalines++; break; default: break; } } if (gotMSF) {format = kMSF; done = 1; } else if (gotClustal) {format = kClustal; done = 1; } else if (gotSquid) {format = kSquid; done = 1; } else if (gotPIR) {format = kPIR; done = 1; } else if (gotGCGdata) {format = kGCGdata; done = 1; } else if (gotuw) { if (foundIG) format= kIG; /* a TOIG file from GCG for certain */ else format= kGCG; done= 1; } else if ((dnalines > 1) || done || (nlines > 500)) { /* decide on most likely format */ /* multichar idents: */ if (foundStrider) format= kStrider; else if (foundGB) format= kGenBank; else if (foundEMBL) format= kEMBL; /* single char idents: */ else if (foundIG) format= kIG; else if (foundPearson) format= kPearson; else if (foundZuker) format= kZuker; /* spacing ident: */ else if (IsSELEXFormat(filename)) format= kSelex; /* no format chars: */ else { squid_errno = SQERR_FORMAT; return 0; } done= 1; } } if (fseq!=NULL) fclose(fseq); *ret_format = format; return 1; #undef ReadOneLine }