Ejemplo n.º 1
0
void IdnaConfTest::Test(void){
    if (!ReadAndConvertFile())return;

    UnicodeString s;
    UnicodeString key;
    UnicodeString value;

    // skip everything before the first "=====" and "=====" itself
    do {
        if (!ReadOneLine(s)) {
            errln("End of file prematurely found");
            break;
        }
    }
    while (s.compare(C_TAG, -1) != 0);   //"====="

    while(ReadOneLine(s)){
        s.trim();
        key.remove();
        value.remove();
        if (s.compare(C_TAG, -1) == 0){   //"====="
            Call();
       } else {
            // explain      key:value
            int p = s.indexOf((UChar)0x3A);    // :
            key.setTo(s,0,p).trim();
            value.setTo(s,p+1).trim();
            if (key.compare(C_TYPE, -1) == 0){
                if (value.compare(C_TOASCII, -1) == 0) {
                    type = 0;
                } else if (value.compare(C_TOUNICODE, -1) == 0){
                    type = 1;
                }
            } else if (key.compare(C_PASSFAIL, -1) == 0){
                if (value.compare(C_PASS, -1) == 0){
                    passfail = 0;
                } else if (value.compare(C_FAIL, -1) == 0){
                    passfail = 1;
                }
            } else if (key.compare(C_DESC, -1) == 0){
                if (value.indexOf(C_USESTD3ASCIIRULES, u_strlen(C_USESTD3ASCIIRULES), 0) == -1){
                    option = 1; // not found
                } else {
                    option = 0;
                }
                id.setTo(value, 0, value.indexOf((UChar)0x20));    // space
            } else if (key.compare(C_NAMEZONE, -1) == 0){
                ExplainCodePointTag(value);
                namezone.setTo(value);
            } else if (key.compare(C_NAMEBASE, -1) == 0){
                ExplainCodePointTag(value);
                namebase.setTo(value);
            }
            // just skip other lines
        }
    }

    Call(); // for last record
}
Ejemplo n.º 2
0
/* ReadOneLine: */
static Boolean ReadOneLine(FILE * fp, char *line)
{
   if (feof(fp))
      return FALSE;
   line[0] = 0;
   fgets(line, 256, fp);
   if (line[0] == '#' || line[0] == 0x0a || line[0] == 0x0d)
      return ReadOneLine(fp, line);
   return TRUE;
}
Ejemplo n.º 3
0
/* LoadScaleRate: Load scale rate file for parameter updating */
static void LoadScaleRate(char *scalefile)
{
   char *line;
   FILE *fp;
   int i, p;
   float fScale;

   line = (char *) malloc(256 * sizeof(char));
   fp = fopen(scalefile, "rt");

   /* read the scale */
   ReadOneLine(fp, line);
   fScale = (float) atof(line);
   for (p = 1; p <= genInfo->nPdfStream[0]; p++) {
      for (i = 1; i <= genInfo->pst[p].order; i++) {
         if (!ReadOneLine(fp, line))
            break;
         mtInfo->SRMean[p][i] = (float) atof(line) * fScale;
      }
   }

   /* read the scale */
   ReadOneLine(fp, line);
   fScale = (float) atof(line);
   for (p = 1; p <= genInfo->nPdfStream[0]; p++) {
      for (i = 1; i <= genInfo->pst[p].order; i++) {
         if (!ReadOneLine(fp, line))
            break;
         mtInfo->SRVar[p][i] = (float) atof(line) * fScale;
      }
   }

   /* read the scale */
   ReadOneLine(fp, line);
   fScale = (float) atof(line);
   for (p = 1; p <= genInfo->nPdfStream[0]; p++) {
      for (i = 1; i <= genInfo->pst[p].order; i++) {
         if (!ReadOneLine(fp, line))
            break;
         mtInfo->DWght[p][i] = (float) atof(line) * fScale;
      }
   }

   free(line);
   fclose(fp);
}
Ejemplo n.º 4
0
/* Function: SeqfileFormat()
 * 
 * Purpose:  Determine format of seqfile, and return it
 *           through ret_format. From Gilbert's seqFileFormat().
 *           
 *           If filename is "-", we will read from stdin and
 *           assume that the stream is coming in FASTA format --
 *           either unaligned or aligned.
 *
 * Args:     filename   - name of sequence file      
 *           ret_format - RETURN: format code for file, see squid.h 
 *                        for codes.
 *           env        - name of environment variable containing
 *                        a directory path that filename might also be
 *                        found in. "BLASTDB", for example. Can be NULL.
 *           
 * Return:   1 on success, 0 on failure.
 */          
int
SeqfileFormat(char *filename, int  *ret_format, char *env)
{
  int   foundIG      = 0;
  int   foundStrider = 0;
  int   foundGB      = 0; 
  int   foundEMBL    = 0; 
  int   foundPearson = 0;
  int   foundZuker   = 0;
  int   gotGCGdata   = 0;
  int   gotPIR       = 0;
  int   gotSquid     = 0;
  int   gotuw        = 0;
  int   gotMSF       = 0;
  int   gotClustal   = 0;
  int   done         = 0;
  int   format       = kUnknown;
  int   nlines= 0, dnalines= 0;
  int   splen = 0;
  char  sp[LINEBUFLEN];
  FILE *fseq;

  /* First check if filename is "-": special case indicating
   * a FASTA pipe.
   */
  if (strcmp(filename, "-") == 0)
    { *ret_format = kPearson; return 1; }

#define ReadOneLine(sp)   \
  { done |= (feof(fseq)); \
    readline( fseq, sp);  \
    if (!done) { splen = (int) strlen(sp); ++nlines; } }

  if ((fseq = fopen(filename, "r")) == NULL &&
      (fseq = EnvFileOpen(filename, env)) == NULL)
    { squid_errno = SQERR_NOFILE;  return 0; }

  /* Look at a line at a time
   */
  while ( !done ) {
    ReadOneLine(sp);

    if (sp==NULL || *sp=='\0')
      /*EMPTY*/ ; 

    /* high probability identities: */
    
    else if (strstr(sp, " MSF:")   != NULL &&
	     strstr(sp, " Type:")  != NULL &&
	     strstr(sp, " Check:") != NULL)
      gotMSF = 1;

    else if (strncmp(sp, "CLUSTAL ", 8) == 0 && 
	     strstr( sp, "multiple sequence alignment"))
      gotClustal = 1;

    else if (strstr(sp," Check: ") != NULL)
      gotuw= 1;

    else if (strncmp(sp, "///", 3) == 0 || strncmp(sp, "ENTRY ", 6) == 0)
      gotPIR = 1;

    else if (strncmp(sp, "++", 2) == 0 || strncmp(sp, "NAM ", 4) == 0)
      gotSquid = 1;

    else if (strncmp(sp, ">>>>", 4) == 0 && strstr(sp, "Len: "))
      gotGCGdata = 1;

    /* uncertain identities: */

    else if (*sp ==';') {
      if (strstr(sp,"Strider") !=NULL) foundStrider= 1;
      else foundIG= 1;
    }
    else if (strncmp(sp,"LOCUS",5) == 0 || strncmp(sp,"ORIGIN",5) == 0)
      foundGB= 1;

    else if (*sp == '>') {
      foundPearson  = 1;
    }

    else if (strstr(sp,"ID   ") == sp || strstr(sp,"SQ   ") == sp)
      foundEMBL= 1;

    else if (*sp == '(')
      foundZuker= 1;

    else {
      switch (Seqtype( sp )) {
      case kDNA:
      case kRNA: if (splen>20) dnalines++; break;
      default:   break;
      }
    }

    if      (gotMSF)     {format = kMSF;     done = 1; }
    else if (gotClustal) {format = kClustal; done = 1; }
    else if (gotSquid)   {format = kSquid;   done = 1; }
    else if (gotPIR)     {format = kPIR;     done = 1; }
    else if (gotGCGdata) {format = kGCGdata; done = 1; }
    else if (gotuw)  
      {
	if (foundIG) format= kIG;  /* a TOIG file from GCG for certain */
	else format= kGCG;
	done= 1;
      }
    else if ((dnalines > 1) || done || (nlines > 500)) {
      /* decide on most likely format */
      /* multichar idents: */
      if (foundStrider)      format= kStrider;
      else if (foundGB)      format= kGenBank;
      else if (foundEMBL)    format= kEMBL;
      /* single char idents: */
      else if (foundIG)      format= kIG;
      else if (foundPearson) format= kPearson;
      else if (foundZuker)   format= kZuker;
      /* spacing ident: */
      else if (IsSELEXFormat(filename)) format= kSelex;
      /* no format chars: */
      else 
	{
	  squid_errno = SQERR_FORMAT;
	  return 0;
	}

      done= 1;
    }
  }

  if (fseq!=NULL) fclose(fseq);

  *ret_format = format;
  return 1;
#undef  ReadOneLine
}