Exemple #1
0
/*
 * Given a string "str", separate the numeric part into
 *   str, and the modifier into mod.
 */
static bool get_modifier(char *str, char *num, int num_len, char *mod, int mod_len)
{
   int i, len, num_begin, num_end, mod_begin, mod_end;

   strip_trailing_junk(str);
   len = strlen(str);

   for (i=0; i<len; i++) {
      if (!B_ISSPACE(str[i])) {
         break;
      }
   }
   num_begin = i;

   /* Walk through integer part */
   for ( ; i<len; i++) {
      if (!B_ISDIGIT(str[i]) && str[i] != '.') {
         break;
      }
   }
   num_end = i;
   if (num_len > (num_end - num_begin + 1)) {
      num_len = num_end - num_begin + 1;
   }
   if (num_len == 0) {
      return false;
   }
   /* Eat any spaces in front of modifier */
   for ( ; i<len; i++) {
      if (!B_ISSPACE(str[i])) {
         break;
      }
   }
   mod_begin = i;
   for ( ; i<len; i++) {
      if (!B_ISALPHA(str[i])) {
         break;
      }
   }
   mod_end = i;
   if (mod_len > (mod_end - mod_begin + 1)) {
      mod_len = mod_end - mod_begin + 1;
   }
   Dmsg5(900, "str=%s: num_beg=%d num_end=%d mod_beg=%d mod_end=%d\n",
      str, num_begin, num_end, mod_begin, mod_end);
   bstrncpy(num, &str[num_begin], num_len);
   bstrncpy(mod, &str[mod_begin], mod_len);
   if (!is_a_number(num)) {
      return false;
   }
   bstrncpy(str, &str[mod_end], len);
   Dmsg2(900, "num=%s mod=%s\n", num, mod);

   return true;
}
Exemple #2
0
/* We assume ASCII input and don't worry about overflow */
uint64_t str_to_uint64(const char *str)
{
   const char *p = str;
   uint64_t value = 0;

   if (!p) {
      return 0;
   }
   while (B_ISSPACE(*p)) {
      p++;
   }
   if (*p == '+') {
      p++;
   }
   while (B_ISDIGIT(*p)) {
      value = B_TIMES10(value) + *p - '0';
      p++;
   }
   return value;
}
Exemple #3
0
int64_t str_to_int64(const char *str)
{
   const char *p = str;
   int64_t value;
   bool negative = false;

   if (!p) {
      return 0;
   }
   while (B_ISSPACE(*p)) {
      p++;
   }
   if (*p == '+') {
      p++;
   } else if (*p == '-') {
      negative = true;
      p++;
   }
   value = str_to_uint64(p);
   if (negative) {
      value = -value;
   }
   return value;
}
Exemple #4
0
/*
 *
 * Get the next token from the input
 *
 */
int lex_get_token(LEX *lf, int expect)
{
   int ch;
   int token = T_NONE;
   bool esc_next = false;
   /* Unicode files, especially on Win32, may begin with a "Byte Order Mark"
      to indicate which transmission format the file is in. The codepoint for
      this mark is U+FEFF and is represented as the octets EF-BB-BF in UTF-8
      and as FF-FE in UTF-16le(little endian) and  FE-FF in UTF-16(big endian).
      We use a distinct state for UTF-8 and UTF-16le, and use bom_bytes_seen
      to tell which byte we are expecting. */
   int bom_bytes_seen = 0;

   Dmsg0(dbglvl, "enter lex_get_token\n");
   while (token == T_NONE) {
      ch = lex_get_char(lf);
      switch (lf->state) {
      case lex_none:
         Dmsg2(dbglvl, "Lex state lex_none ch=%d,%x\n", ch, ch);
         if (B_ISSPACE(ch))
            break;
         if (B_ISALPHA(ch)) {
            if (lf->options & LOPT_NO_IDENT || lf->options & LOPT_STRING) {
               lf->state = lex_string;
            } else {
               lf->state = lex_identifier;
            }
            begin_str(lf, ch);
            break;
         }
         if (B_ISDIGIT(ch)) {
            if (lf->options & LOPT_STRING) {
               lf->state = lex_string;
            } else {
               lf->state = lex_number;
            }
            begin_str(lf, ch);
            break;
         }
         Dmsg0(dbglvl, "Enter lex_none switch\n");
         switch (ch) {
         case L_EOF:
            token = T_EOF;
            Dmsg0(dbglvl, "got L_EOF set token=T_EOF\n");
            break;
         case '#':
            lf->state = lex_comment;
            break;
         case '{':
            token = T_BOB;
            begin_str(lf, ch);
            break;
         case '}':
            token = T_EOB;
            begin_str(lf, ch);
            break;
         case '"':
            lf->state = lex_quoted_string;
            begin_str(lf, 0);
            break;
         case '=':
            token = T_EQUALS;
            begin_str(lf, ch);
            break;
         case ',':
            token = T_COMMA;
            begin_str(lf, ch);
            break;
         case ';':
            if (expect != T_SKIP_EOL) {
               token = T_EOL;      /* treat ; like EOL */
            }
            break;
         case L_EOL:
            Dmsg0(dbglvl, "got L_EOL set token=T_EOL\n");
            if (expect != T_SKIP_EOL) {
               token = T_EOL;
            }
            break;
         case '@':
            /* In NO_EXTERN mode, @ is part of a string */
            if (lf->options & LOPT_NO_EXTERN) {
               lf->state = lex_string;
               begin_str(lf, ch);
            } else {
               lf->state = lex_include;
               begin_str(lf, 0);
            }
            break;
         case 0xEF: /* probably a UTF-8 BOM */
         case 0xFF: /* probably a UTF-16le BOM */
         case 0xFE: /* probably a UTF-16be BOM (error)*/
            if (lf->line_no != 1 || lf->col_no != 1)
            {
               lf->state = lex_string;
               begin_str(lf, ch);
            } else {
               bom_bytes_seen = 1;
               if (ch == 0xEF) {
                  lf->state = lex_utf8_bom;
               } else if (ch == 0xFF) {
                  lf->state = lex_utf16_le_bom;
               } else {
                  scan_err0(lf, _("This config file appears to be in an "
                     "unsupported Unicode format (UTF-16be). Please resave as UTF-8\n"));
                  return T_ERROR;
               }
            }
            break;
         default:
            lf->state = lex_string;
            begin_str(lf, ch);
            break;
         }
         break;
      case lex_comment:
         Dmsg1(dbglvl, "Lex state lex_comment ch=%x\n", ch);
         if (ch == L_EOL) {
            lf->state = lex_none;
            if (expect != T_SKIP_EOL) {
               token = T_EOL;
            }
         } else if (ch == L_EOF) {
            token = T_ERROR;
         }
         break;
      case lex_number:
         Dmsg2(dbglvl, "Lex state lex_number ch=%x %c\n", ch, ch);
         if (ch == L_EOF) {
            token = T_ERROR;
            break;
         }
         /* Might want to allow trailing specifications here */
         if (B_ISDIGIT(ch)) {
            add_str(lf, ch);
            break;
         }

         /* A valid number can be terminated by the following */
         if (B_ISSPACE(ch) || ch == L_EOL || ch == ',' || ch == ';') {
            token = T_NUMBER;
            lf->state = lex_none;
         } else {
            lf->state = lex_string;
         }
         lex_unget_char(lf);
         break;
      case lex_ip_addr:
         if (ch == L_EOF) {
            token = T_ERROR;
            break;
         }
         Dmsg1(dbglvl, "Lex state lex_ip_addr ch=%x\n", ch);
         break;
      case lex_string:
         Dmsg1(dbglvl, "Lex state lex_string ch=%x\n", ch);
         if (ch == L_EOF) {
            token = T_ERROR;
            break;
         }
         if (ch == '\n' || ch == L_EOL || ch == '=' || ch == '}' || ch == '{' ||
             ch == '\r' || ch == ';' || ch == ',' || ch == '#' || (B_ISSPACE(ch)) ) {
            lex_unget_char(lf);
            token = T_UNQUOTED_STRING;
            lf->state = lex_none;
            break;
         }
         add_str(lf, ch);
         break;
      case lex_identifier:
         Dmsg2(dbglvl, "Lex state lex_identifier ch=%x %c\n", ch, ch);
         if (B_ISALPHA(ch)) {
            add_str(lf, ch);
            break;
         } else if (B_ISSPACE(ch)) {
            break;
         } else if (ch == '\n' || ch == L_EOL || ch == '=' || ch == '}' || ch == '{' ||
                    ch == '\r' || ch == ';' || ch == ','   || ch == '"' || ch == '#') {
            lex_unget_char(lf);
            token = T_IDENTIFIER;
            lf->state = lex_none;
            break;
         } else if (ch == L_EOF) {
            token = T_ERROR;
            lf->state = lex_none;
            begin_str(lf, ch);
            break;
         }
         /* Some non-alpha character => string */
         lf->state = lex_string;
         add_str(lf, ch);
         break;
      case lex_quoted_string:
         Dmsg2(dbglvl, "Lex state lex_quoted_string ch=%x %c\n", ch, ch);
         if (ch == L_EOF) {
            token = T_ERROR;
            break;
         }
         if (ch == L_EOL) {
            esc_next = false;
            break;
         }
         if (esc_next) {
            add_str(lf, ch);
            esc_next = false;
            break;
         }
         if (ch == '\\') {
            esc_next = true;
            break;
         }
         if (ch == '"') {
            token = T_QUOTED_STRING;
            /*
             * Since we may be scanning a quoted list of names,
             *  we get the next character (a comma indicates another
             *  one), then we put it back for rescanning.
             */
            lex_get_char(lf);
            lex_unget_char(lf);
            lf->state = lex_none;
            break;
         }
         add_str(lf, ch);
         break;
      case lex_include_quoted_string:
         if (ch == L_EOF) {
            token = T_ERROR;
            break;
         }
         if (esc_next) {
            add_str(lf, ch);
            esc_next = false;
            break;
         }
         if (ch == '\\') {
            esc_next = true;
            break;
         }
         if (ch == '"') {
            /* Keep the original LEX so we can print an error if the included file can't be opened. */
            LEX* lfori = lf;
            /* Skip the double quote when restarting parsing */
            lex_get_char(lf);

            lf->state = lex_none;
            lf = lex_open_file(lf, lf->str, lf->scan_error, lf->scan_warning);
            if (lf == NULL) {
               berrno be;
               scan_err2(lfori, _("Cannot open included config file %s: %s\n"),
                  lfori->str, be.bstrerror());
               return T_ERROR;
            }
            break;
         }
         add_str(lf, ch);
         break;
      case lex_include:            /* scanning a filename */
         if (ch == L_EOF) {
            token = T_ERROR;
            break;
         }
         if (ch == '"') {
            lf->state = lex_include_quoted_string;
            break;
         }


         if (B_ISSPACE(ch) || ch == '\n' || ch == L_EOL || ch == '}' || ch == '{' ||
             ch == ';' || ch == ','   || ch == '"' || ch == '#') {
            /* Keep the original LEX so we can print an error if the included file can't be opened. */
            LEX* lfori = lf;

            lf->state = lex_none;
            lf = lex_open_file(lf, lf->str, lf->scan_error, lf->scan_warning);
            if (lf == NULL) {
               berrno be;
               scan_err2(lfori, _("Cannot open included config file %s: %s\n"),
                  lfori->str, be.bstrerror());
               return T_ERROR;
            }
            break;
         }
         add_str(lf, ch);
         break;
      case lex_utf8_bom:
         /* we only end up in this state if we have read an 0xEF
            as the first byte of the file, indicating we are probably
            reading a UTF-8 file */
         if (ch == 0xBB && bom_bytes_seen == 1) {
            bom_bytes_seen++;
         } else if (ch == 0xBF && bom_bytes_seen == 2) {
            token = T_UTF8_BOM;
            lf->state = lex_none;
         } else {
            token = T_ERROR;
         }
         break;
      case lex_utf16_le_bom:
         /* we only end up in this state if we have read an 0xFF
            as the first byte of the file -- indicating that we are
            probably dealing with an Intel based (little endian) UTF-16 file*/
         if (ch == 0xFE) {
            token = T_UTF16_BOM;
            lf->state = lex_none;
         } else {
            token = T_ERROR;
         }
         break;
      }
      Dmsg4(dbglvl, "ch=%d state=%s token=%s %c\n", ch, lex_state_to_str(lf->state),
        lex_tok_to_str(token), ch);
   }
   Dmsg2(dbglvl, "lex returning: line %d token: %s\n", lf->line_no, lex_tok_to_str(token));
   lf->token = token;

   /*
    * Here is where we check to see if the user has set certain
    *  expectations (e.g. 32 bit integer). If so, we do type checking
    *  and possible additional scanning (e.g. for range).
    */
   switch (expect) {
   case T_PINT16:
      lf->u.pint16_val = (scan_pint(lf, lf->str) & 0xffff);
      lf->u2.pint16_val = lf->u.pint16_val;
      token = T_PINT16;
      break;

   case T_PINT32:
      lf->u.pint32_val = scan_pint(lf, lf->str);
      lf->u2.pint32_val = lf->u.pint32_val;
      token = T_PINT32;
      break;

   case T_PINT32_RANGE:
      if (token == T_NUMBER) {
         lf->u.pint32_val = scan_pint(lf, lf->str);
         lf->u2.pint32_val = lf->u.pint32_val;
         token = T_PINT32;
      } else {
         char *p = strchr(lf->str, '-');
         if (!p) {
            scan_err2(lf, _("expected an integer or a range, got %s: %s"),
               lex_tok_to_str(token), lf->str);
            token = T_ERROR;
            break;
         }
         *p++ = 0;                       /* terminate first half of range */
         lf->u.pint32_val  = scan_pint(lf, lf->str);
         lf->u2.pint32_val = scan_pint(lf, p);
         token = T_PINT32_RANGE;
      }
      break;

   case T_INT16:
      if (token != T_NUMBER || !is_a_number(lf->str)) {
         scan_err2(lf, _("expected an integer number, got %s: %s"),
               lex_tok_to_str(token), lf->str);
         token = T_ERROR;
         break;
      }
      errno = 0;
      lf->u.int16_val = (int16_t)str_to_int64(lf->str);
      if (errno != 0) {
         scan_err2(lf, _("expected an integer number, got %s: %s"),
               lex_tok_to_str(token), lf->str);
         token = T_ERROR;
      } else {
         token = T_INT16;
      }
      break;

   case T_INT32:
      if (token != T_NUMBER || !is_a_number(lf->str)) {
         scan_err2(lf, _("expected an integer number, got %s: %s"),
               lex_tok_to_str(token), lf->str);
         token = T_ERROR;
         break;
      }
      errno = 0;
      lf->u.int32_val = (int32_t)str_to_int64(lf->str);
      if (errno != 0) {
         scan_err2(lf, _("expected an integer number, got %s: %s"),
               lex_tok_to_str(token), lf->str);
         token = T_ERROR;
      } else {
         token = T_INT32;
      }
      break;

   case T_INT64:
      Dmsg2(dbglvl, "int64=:%s: %f\n", lf->str, strtod(lf->str, NULL));
      if (token != T_NUMBER || !is_a_number(lf->str)) {
         scan_err2(lf, _("expected an integer number, got %s: %s"),
               lex_tok_to_str(token), lf->str);
         token = T_ERROR;
         break;
      }
      errno = 0;
      lf->u.int64_val = str_to_int64(lf->str);
      if (errno != 0) {
         scan_err2(lf, _("expected an integer number, got %s: %s"),
               lex_tok_to_str(token), lf->str);
         token = T_ERROR;
      } else {
         token = T_INT64;
      }
      break;

   case T_PINT64_RANGE:
      if (token == T_NUMBER) {
         lf->u.pint64_val = scan_pint64(lf, lf->str);
         lf->u2.pint64_val = lf->u.pint64_val;
         token = T_PINT64;
      } else {
         char *p = strchr(lf->str, '-');
         if (!p) {
            scan_err2(lf, _("expected an integer or a range, got %s: %s"),
               lex_tok_to_str(token), lf->str);
            token = T_ERROR;
            break;
         }
         *p++ = 0;                       /* terminate first half of range */
         lf->u.pint64_val  = scan_pint64(lf, lf->str);
         lf->u2.pint64_val = scan_pint64(lf, p);
         token = T_PINT64_RANGE;
      }
      break;

   case T_NAME:
      if (token != T_IDENTIFIER && token != T_UNQUOTED_STRING && token != T_QUOTED_STRING) {
         scan_err2(lf, _("expected a name, got %s: %s"),
               lex_tok_to_str(token), lf->str);
         token = T_ERROR;
      } else if (lf->str_len > MAX_RES_NAME_LENGTH) {
         scan_err3(lf, _("name %s length %d too long, max is %d\n"), lf->str,
            lf->str_len, MAX_RES_NAME_LENGTH);
         token = T_ERROR;
      }
      break;

   case T_STRING:
      if (token != T_IDENTIFIER && token != T_UNQUOTED_STRING && token != T_QUOTED_STRING) {
         scan_err2(lf, _("expected a string, got %s: %s"),
               lex_tok_to_str(token), lf->str);
         token = T_ERROR;
      } else {
         token = T_STRING;
      }
      break;


   default:
      break;                          /* no expectation given */
   }
   lf->token = token;                 /* set possible new token */
   return token;
}