Exemplo n.º 1
0
int main(int argc, char **argv)
{
  program_name = argv[0];
  static char stderr_buf[BUFSIZ];
  setbuf(stderr, stderr_buf);
  
  const char *base_name = 0;
  typedef int (*parser_t)(const char *);
  parser_t parser = do_file;
  const char *directory = 0;
  const char *foption = 0;
  int opt;
  static const struct option long_options[] = {
    { "help", no_argument, 0, CHAR_MAX + 1 },
    { "version", no_argument, 0, 'v' },
    { NULL, 0, 0, 0 }
  };
  while ((opt = getopt_long(argc, argv, "c:o:h:i:k:l:t:n:c:d:f:vw",
			    long_options, NULL))
	 != EOF)
    switch (opt) {
    case 'c':
      common_words_file = optarg;
      break;
    case 'd':
      directory = optarg;
      break;
    case 'f':
      foption = optarg;
      break;
    case 'h':
      check_integer_arg('h', optarg, 1, &hash_table_size);
      if (!is_prime(hash_table_size)) {
	while (!is_prime(++hash_table_size))
	  ;
	warning("%1 not prime: using %2 instead", optarg, hash_table_size);
      }
      break;
    case 'i':
      ignore_fields = optarg;
      break;
    case 'k':
      check_integer_arg('k', optarg, 1, &max_keys_per_item);
      break;
    case 'l':
      check_integer_arg('l', optarg, 0, &shortest_len);
      break;
    case 'n':
      check_integer_arg('n', optarg, 0, &n_ignore_words);
      break;
    case 'o':
      base_name = optarg;
      break;
    case 't':
      check_integer_arg('t', optarg, 1, &truncate_len);
      break;
    case 'w':
      parser = do_whole_file;
      break;
    case 'v':
      printf("GNU indxbib (groff) version %s\n", Version_string);
      exit(0);
      break;
    case CHAR_MAX + 1: // --help
      usage(stdout);
      exit(0);
      break;
    case '?':
      usage(stderr);
      exit(1);
      break;
    default:
      assert(0);
      break;
    }
  if (optind >= argc && foption == 0)
    fatal("no files and no -f option");
  if (!directory) {
    char *path = get_cwd();
    store_filename(path);
    a_delete path;
  }
  else
    store_filename(directory);
  init_hash_table();
  store_filename(common_words_file);
  store_filename(ignore_fields);
  key_buffer = new char[truncate_len];
  read_common_words_file();
  if (!base_name)
    base_name = optind < argc ? argv[optind] : DEFAULT_INDEX_NAME;
  const char *p = strrchr(base_name, DIR_SEPS[0]), *p1;
  const char *sep = &DIR_SEPS[1];
  while (*sep) {
    p1 = strrchr(base_name, *sep);
    if (p1 && (!p || p1 > p))
      p = p1;
    sep++;
  }
  size_t name_max;
  if (p) {
    char *dir = strsave(base_name);
    dir[p - base_name] = '\0';
    name_max = file_name_max(dir);
    a_delete dir;
  }
  else
    name_max = file_name_max(".");
  const char *filename = p ? p + 1 : base_name;
  if (strlen(filename) + sizeof(INDEX_SUFFIX) - 1 > name_max)
    fatal("`%1.%2' is too long for a filename", filename, INDEX_SUFFIX);
  if (p) {
    p++;
    temp_index_file = new char[p - base_name + sizeof(TEMP_INDEX_TEMPLATE)];
    memcpy(temp_index_file, base_name, p - base_name);
    strcpy(temp_index_file + (p - base_name), TEMP_INDEX_TEMPLATE);
  }
  else {
    temp_index_file = strsave(TEMP_INDEX_TEMPLATE);
  }
  catch_fatal_signals();
  int fd = mkstemp(temp_index_file);
  if (fd < 0)
    fatal("can't create temporary index file: %1", strerror(errno));
  indxfp = fdopen(fd, FOPEN_WB);
  if (indxfp == 0)
    fatal("fdopen failed");
  if (fseek(indxfp, sizeof(index_header), 0) < 0)
    fatal("can't seek past index header: %1", strerror(errno));
  int failed = 0;
  if (foption) {
    FILE *fp = stdin;
    if (strcmp(foption, "-") != 0) {
      errno = 0;
      fp = fopen(foption, "r");
      if (!fp)
	fatal("can't open `%1': %2", foption, strerror(errno));
    }
    string path;
    int lineno = 1;
    for (;;) {
      int c;
      for (c = getc(fp); c != '\n' && c != EOF; c = getc(fp)) {
	if (c == '\0')
	  error_with_file_and_line(foption, lineno,
				   "nul character in pathname ignored");
	else
	  path += c;
      }
      if (path.length() > 0) {
	path += '\0';
	if (!(*parser)(path.contents()))
	  failed = 1;
	path.clear();
      }
      if (c == EOF)
	break;
      lineno++;
    }
    if (fp != stdin)
      fclose(fp);
  }
  for (int i = optind; i < argc; i++)
    if (!(*parser)(argv[i]))
      failed = 1;
  write_hash_table();
  if (fclose(indxfp) < 0)
    fatal("error closing temporary index file: %1", strerror(errno));
  char *index_file = new char[strlen(base_name) + sizeof(INDEX_SUFFIX)];    
  strcpy(index_file, base_name);
  strcat(index_file, INDEX_SUFFIX);
#ifdef HAVE_RENAME
#ifdef __EMX__
  if (access(index_file, R_OK) == 0)
    unlink(index_file);
#endif /* __EMX__ */
  if (rename(temp_index_file, index_file) < 0) {
#ifdef __MSDOS__
    // RENAME could fail on plain MSDOS filesystems because
    // INDEX_FILE is an invalid filename, e.g. it has multiple dots.
    char *fname = p ? index_file + (p - base_name) : 0;
    char *dot = 0;

    // Replace the dot with an underscore and try again.
    if (fname
        && (dot = strchr(fname, '.')) != 0
        && strcmp(dot, INDEX_SUFFIX) != 0)
      *dot = '_';
    if (rename(temp_index_file, index_file) < 0)
#endif
    fatal("can't rename temporary index file: %1", strerror(errno));
  }
#else /* not HAVE_RENAME */
  ignore_fatal_signals();
  if (unlink(index_file) < 0) {
    if (errno != ENOENT)
      fatal("can't unlink `%1': %2", index_file, strerror(errno));
  }
  if (link(temp_index_file, index_file) < 0)
    fatal("can't link temporary index file: %1", strerror(errno));
  if (unlink(temp_index_file) < 0)
    fatal("can't unlink temporary index file: %1", strerror(errno));
#endif /* not HAVE_RENAME */
  temp_index_file = 0;
  return failed;
}
Exemplo n.º 2
0
/* parse_format_string
 *  parse format string and args into structured format
 */
static
rc_t parse_format_string ( const char *fmt_str, ParseData *pd, VAList *vargs )
{
    rc_t rc;
    uint32_t i, fmt_idx;

    PrintFmt *fmt = pd -> fmt;

    /* initialize returned counters */
    pd -> lit_size = 0;
    pd -> str_idx = pd -> fmt_idx = pd -> arg_idx = 0;

    /* loop over format string */
    for ( rc = 0, i = fmt_idx = 0; fmt_str [ i ] != 0; ++ i )
    {
        uint32_t domain;
        bool alternate, numeric;
        char size_modifier, time_modifier;
        bool has_width, has_precision, has_index;

        /* loop to gather literal portions */
        uint32_t start;
        for ( start = i; ; ++ i )
        {
            /* run until we hit start of substitution token
               or until we hit end of format string */
            if ( fmt_str [ i ] != 0 && fmt_str [ i ] != '%' )
                continue;

            /* detect a literal string */
            if ( i != start )
            {
                /* expand into overflow */
                if ( fmt_idx == LOCAL_FMT_COUNT )
                {
                    rc = create_overflow ( pd, fmt_idx );
                    if ( rc != 0 )
                        return rc;

                    fmt = pd -> fmt;
                }

                /* create a text-literal format */
                memset ( & fmt [ fmt_idx ], 0, sizeof fmt [ 0 ] );
                fmt [ fmt_idx ] . u . l . text = & fmt_str [ start ];
                fmt [ fmt_idx ] . u . l . size = i - start;
                pd -> lit_size += i - start;
                fmt [ fmt_idx ] . fmt = spfText;
                fmt [ fmt_idx ] . type = sptLiteral;

                /* increment counter */
                ++ fmt_idx;
            }

            /* detect escape sequence */
            if ( fmt_str [ i ] == 0 || fmt_str [ i + 1 ] != '%' )
                break;

            /* skip over escape */
            start = ++ i;
        }

        /* done when NUL byte is seen */
        if ( fmt_str [ i ] == 0 )
            break;

        /* detect overflow */
        if ( fmt_idx == LOCAL_FMT_COUNT )
        {
            rc = create_overflow ( pd, fmt_idx );
            if ( rc != 0 )
                return rc;

            fmt = pd -> fmt;
        }

        /* initial format
         *  thousands_separate    = false
         *  add_prefix            = false
         *  force_decimal_point   = false
         *  leave_trailing_zeros  = false
         *  print_time            = false
         *  print_date            = false
         *  print_weekday         = false
         *  print_timezone        = false
         *  hour_24               = false
         *  sign                  = 0
         *  left_fill             = space
         */
        memset ( & fmt [ fmt_idx ], 0, sizeof fmt [ 0 ] );
        fmt [ fmt_idx ] . left_fill = ' ';

        /* scan flags */
        alternate = false;
        while ( 1 )
        {
            switch ( fmt_str [ ++ i ] )
            {
                /* plus and space modify application of sign
                   to signed integer and floating point conversions.
                   plus overrides space. */
            case ' ':
                if ( fmt [ fmt_idx ] . sign == 0 )
            case '+':
                    fmt [ fmt_idx ] . sign = fmt_str [ i ];
                continue;

                /* dash indicates left-alignment. indicate this
                   by setting "left_fill" to NUL. */
            case '-':
                fmt [ fmt_idx ] . left_fill = 0;
                continue;

                /* zero indicates an alternate left-fill for
                   numeric conversions. the zero is inserted before
                   any sign character in { '+', '-' or ' ' }.
                   since "left_fill" is also used to indicate
                   alignment, only store when right aligning. */
            case '0':
                if ( fmt [ fmt_idx ] . left_fill != 0 )
                    fmt [ fmt_idx ] . left_fill = '0';
                continue;

                /* hash indicates that the formatter should use an
                   "alternate" approach. that approach is specific
                   to the format. */
            case '#':
                alternate = true;
                continue;

                /* comma ( or apostrophe outside of US ) indicates
                   that the integer portion of a numeral should use
                   a comma as a thousands separator for legibility. */
            case ',':
            case '\'':
                fmt [ fmt_idx ] . thousands_separate = 1;
                continue;
            }

            /* we've hit a non-flag character */
            break;
        }

        /* minimum field width */
        has_width = false;
        if ( isdigit ( fmt_str [ i ] ) )
        {
            /* literal */
            has_width = true;
            fmt [ fmt_idx ] . u . f . min_field_width = fmt_str [ i ] - '0';
            while ( isdigit ( fmt_str [ ++ i ] ) )
            {
                fmt [ fmt_idx ] . u . f . min_field_width *= 10;
                fmt [ fmt_idx ] . u . f . min_field_width += fmt_str [ i ] - '0';
            }
        }
        else if ( fmt_str [ i ] == '*' )
        {
            /* external */
            rc = check_integer_arg ( vargs );
            if ( rc != 0 )
                return rc;

            has_width = true;
            fmt [ fmt_idx ] . ext_field_width = 1;
            ++ pd -> arg_idx;
            ++ i;
        }

        /* precision */
        has_precision = false;
        if ( fmt_str [ i ] == '.' )
        {
            /* a single dot implies a precision value of 0 */
            has_precision = true;

            if ( isdigit ( fmt_str [ ++ i ] ) )
            {
                /* a literal precision */
                fmt [ fmt_idx ] . u . f . precision = fmt_str [ i ] - '0';
                while ( isdigit ( fmt_str [ ++ i ] ) )
                {
                    fmt [ fmt_idx ] . u . f . precision *= 10;
                    fmt [ fmt_idx ] . u . f . precision += fmt_str [ i ] - '0';
                }
            }
            else if ( fmt_str [ i ] == '*' )
            {
                /* external */
                rc = check_integer_arg ( vargs );
                if ( rc != 0 )
                    return rc;

                fmt [ fmt_idx ] . ext_precision = 1;
                ++ pd -> arg_idx;
                ++ i;
            }
            else if ( fmt_str [ i ] == '-' )
            {
                /* eat a negative precision - treat as 0 */
                while ( isdigit ( fmt_str [ ++ i ] ) )
                    ( void ) 0;
            }
        }

        /* index - given when parameter is a vector */
        has_index = false;
        if ( fmt_str [ i ] == ':' )
        {
            bool has_start, has_len, has_end, end_is_stop;
            has_start = has_len = has_end = end_is_stop = false;

            /* parameter is taken as a vector,
               with a default index starting at 0 */
            has_index = true;

            if ( isdigit ( fmt_str [ ++ i ] ) )
            {
                /* literal index */
                fmt [ fmt_idx ] . u . f . start_idx = fmt_str [ i ] - '0';
                while ( isdigit ( fmt_str [ ++ i ] ) )
                {
                    fmt [ fmt_idx ] . u . f . start_idx *= 10;
                    fmt [ fmt_idx ] . u . f . start_idx += fmt_str [ i ] - '0';
                }
                has_start = true;
            }
            else switch ( fmt_str [ i ] )
            {
            case '*':
                /* external */
                rc = check_integer_arg ( vargs );
                if ( rc != 0 )
                    return rc;

                fmt [ fmt_idx ] . ext_start_index = 1;
                ++ pd -> arg_idx;
                ++ i;
                has_start = true;
                break;
            case '$':
                fmt [ fmt_idx ] . inf_start_index = 1;
                fmt [ fmt_idx ] . ext_start_index = 1;
                ++ pd -> arg_idx;
                ++ i;
                has_start = true;
                break;
            }

            /* detect range */
            switch ( fmt_str [ i ] )
            {
                /* given as start-stop */
            case '-':
                end_is_stop = true;

                /* given as start/len */
            case '/':

                has_len = true;

                if ( isdigit ( fmt_str [ ++ i ] ) )
                {
                    /* literal selection length or end */
                    fmt [ fmt_idx ] . u . f . select_len = fmt_str [ i ] - '0';
                    while ( isdigit ( fmt_str [ ++ i ] ) )
                    {
                        fmt [ fmt_idx ] . u . f . select_len *= 10;
                        fmt [ fmt_idx ] . u . f . select_len += fmt_str [ i ] - '0';
                    }
                    has_end = true;
                }
                else switch ( fmt_str [ i ] )
                {
                case '*':
                    /* external */
                    rc = check_integer_arg ( vargs );
                    if ( rc != 0 )
                        return rc;

                    /* external selection length or end */
                    fmt [ fmt_idx ] . ext_stop_index = end_is_stop;
                    fmt [ fmt_idx ] . ext_select_len = ! end_is_stop;
                    ++ pd -> arg_idx;
                    ++ i;
                    has_end = true;
                    break;
                case '$':
                    /* ignore index end if start is infinite */
                    if ( ! fmt [ fmt_idx ] . inf_start_index )
                    {
                        fmt [ fmt_idx ] . inf_stop_index = 1;
                        fmt [ fmt_idx ] . ext_stop_index = 1;
                        ++ pd -> arg_idx;
                        end_is_stop = has_end = true;
                    }
                    ++ i;
                    break;
                case '-':
                    /* negatives are garbage */
                    while ( isdigit ( fmt_str [ ++ i ] ) )
                        ( void ) 0;
                    break;
                default:
                    end_is_stop = false;
                }
                break;
            }

            if ( ! has_len && has_start )
                fmt [ fmt_idx ] . u . f . select_len = 1;
        }

        /* size - accept for brownie-points and for KTime */
        size_modifier = time_modifier = 0;
        switch ( fmt_str [ i ] )
        {
            /* "Tiny" modifier - like "hh" in C format */
        case 't':
            /* "Half" modifier - same as C format */
        case 'h':
            /* "Long" modifier - means 64-bit for integers, otherwise like C */
        case 'l':
            size_modifier = time_modifier = fmt_str [ i ++ ];
            break;
            /* "siZe" modifier - whatever the size of size_t is */
        case 'z':
            ++ i;
            time_modifier = 'z';
            if ( sizeof ( size_t ) == sizeof ( uint64_t ) )
                size_modifier = 'l';
            break;
        }

        /* output format
           describes the formatting to apply on output
           if precision has not been set, give it a default value */
        domain = 0;
        numeric = false;
        switch ( fmt_str [ i ] )
        {
            /* decimal signed integer */
        case 'd':
        case 'i':
            fmt [ fmt_idx ] . radix = 10;
            fmt [ fmt_idx ] . fmt = spfSignedInt;
            numeric = true;
            if ( ! has_precision )
                fmt [ fmt_idx ] . u . f . precision = 1;
            else if ( fmt [ fmt_idx ] . left_fill == '0' )
                fmt [ fmt_idx ] . left_fill = ' ';
            domain = vtdInt;
            break;

            /* decimal unsigned integer */
        case 'u':
            fmt [ fmt_idx ] . radix = 10;
        unsigned_int:
            fmt [ fmt_idx ] . fmt = spfUnsigned;
            fmt [ fmt_idx ] . sign = 0;
            numeric = true;
            if ( ! has_precision )
                fmt [ fmt_idx ] . u . f . precision = 1;
            else if ( fmt [ fmt_idx ] . left_fill == '0' )
                fmt [ fmt_idx ] . left_fill = ' ';
            domain = vtdUint;
            break;

            /* hex unsigned integer */
        case 'x':
            fmt [ fmt_idx ] . add_prefix = alternate;
            fmt [ fmt_idx ] . radix = 16;
            goto unsigned_int;

            /* upper-case hex unsigned integer */
        case 'X':
            fmt [ fmt_idx ] . upper_case_num = 1;
            fmt [ fmt_idx ] . add_prefix = alternate;
            fmt [ fmt_idx ] . radix = 16;
            goto unsigned_int;

            /* octal unsigned integer */
        case 'o':
            fmt [ fmt_idx ] . add_prefix = alternate;
            fmt [ fmt_idx ] . radix = 8;
            goto unsigned_int;

            /* binary unsigned integer */
        case 'b':
            fmt [ fmt_idx ] . add_prefix = alternate;
            fmt [ fmt_idx ] . radix = 2;
            goto unsigned_int;

            /* decimal signed floating point */
        case 'f':
            fmt [ fmt_idx ] . fmt = spfStdFloat;
        fmt_float:
            fmt [ fmt_idx ] . radix = 10;
            fmt [ fmt_idx ] . force_decimal_point = alternate;
            numeric = true;
            if ( ! has_precision )
                fmt [ fmt_idx ] . u . f . precision = 6;
            domain = vtdFloat;
            break;

            /* scientific notation floating point */
        case 'e':
            fmt [ fmt_idx ] . fmt = spfSciFloat;
            goto fmt_float;

            /* "general" floating point */
        case 'g':
            fmt [ fmt_idx ] . leave_trailing_zeros = alternate;
            fmt [ fmt_idx ] . fmt = spfGenFloat;
            goto fmt_float;

            /* character data */
        case 's':
            if ( ! has_precision )
            /* no break */
        case 'c':
                fmt [ fmt_idx ] . u . f . precision = -1;
            fmt [ fmt_idx ] . fmt = spfText;
            domain = vtdUnicode;
            break;

        default:
            return RC ( rcXF, rcString, rcFormatting, rcFormat, rcUnrecognized );
        }

        /* handle zero padding for non-numeric cases */
        if ( ! numeric && fmt [ fmt_idx ] . left_fill == '0' )
            fmt [ fmt_idx ] . left_fill = ' ';

        /* take size from actual parameter */
        rc = extract_size_modifier ( vargs, & size_modifier );
        if ( rc != 0 )
            return rc;

        /* determine type from argument */
        switch ( vargs -> dp -> argv [ vargs -> idx ] . desc . domain )
        {
        case vtdBool:
        case vtdUint:
            switch ( domain )
            {
            case vtdBool:
            case vtdUint:
            case vtdInt:
                break;
            case vtdFloat:
                fmt [ fmt_idx ] . type_cast = 1;
                break;
            default:
                return RC ( rcXF, rcString, rcFormatting, rcParam, rcIncorrect );
            }

            switch ( size_modifier )
            {
            case 't':
                fmt [ fmt_idx ] . type = sptUnsignedInt8Vect;
                break;
            case 'h':
                fmt [ fmt_idx ] . type = sptUnsignedInt16Vect;
                break;
            case 0:
                fmt [ fmt_idx ] . type = sptUnsignedInt32Vect;
                break;
            case 'l':
                fmt [ fmt_idx ] . type = sptUnsignedInt64Vect;
                break;
            }
            break;

        case vtdInt:
            switch ( domain )
            {
            case vtdBool:
            case vtdUint:
            case vtdInt:
                break;
            case vtdFloat:
                fmt [ fmt_idx ] . type_cast = 1;
                break;
            default:
                return RC ( rcXF, rcString, rcFormatting, rcParam, rcIncorrect );
            }

            switch ( size_modifier )
            {
            case 't':
                fmt [ fmt_idx ] . type = sptSignedInt8Vect;
                break;
            case 'h':
                fmt [ fmt_idx ] . type = sptSignedInt16Vect;
                break;
            case 0:
                fmt [ fmt_idx ] . type = sptSignedInt32Vect;
                break;
            case 'l':
                fmt [ fmt_idx ] . type = sptSignedInt64Vect;
                break;
            }
            break;

        case vtdFloat:
            switch ( domain )
            {
            case vtdBool:
            case vtdUint:
            case vtdInt:
                fmt [ fmt_idx ] . type_cast = 1;
                break;
            case vtdFloat:
                break;
            default:
                return RC ( rcXF, rcString, rcFormatting, rcParam, rcIncorrect );
            }

            switch ( size_modifier )
            {
            case 'h':
                fmt [ fmt_idx ] . type = sptFloat32Vect;
                break;
            case 0:
                fmt [ fmt_idx ] . type = sptFloat64Vect;
                break;
            }
            break;

        case vtdAscii:
            switch ( domain )
            {
            case vtdAscii:
            case vtdUnicode:
                if ( size_modifier != 0 )
                    fmt [ fmt_idx ] . type_cast = 1;
                break;
            default:
                return RC ( rcXF, rcString, rcFormatting, rcParam, rcIncorrect );
            }

            fmt [ fmt_idx ] . type = sptString;

            ++ pd -> str_idx;
            break;

        case vtdUnicode:
            switch ( domain )
            {
            case vtdAscii:
            case vtdUnicode:
                switch ( size_modifier )
                {
                case 0:
                    fmt [ fmt_idx ] . type = sptString;
                    break;
                case 'h':
                    fmt [ fmt_idx ] . type_cast = 1;
                    fmt [ fmt_idx ] . type = sptUCS2String;
                    break;
                case 'l':
                    fmt [ fmt_idx ] . type_cast = 1;
                    fmt [ fmt_idx ] . type = sptUTF32String;
                    break;
                }
                break;
            default:
                return RC ( rcXF, rcString, rcFormatting, rcParam, rcIncorrect );
            }

            ++ pd -> str_idx;
            break;
        }

        /* account for format argument */
        ++ fmt_idx;
        ++ pd -> arg_idx;
        ++ vargs -> idx;
    }

    /* record final fmt */
    if ( rc == 0 )
    {
        if ( fmt_idx == LOCAL_FMT_COUNT )
        {
            rc = create_overflow ( pd, fmt_idx );
            if ( rc != 0 )
                return rc;

            fmt = pd -> fmt;
        }

        memset ( & fmt [ fmt_idx ++ ], 0, sizeof fmt [ 0 ] );

        /* if not all arguments were consumed, should this be an error? */
        if ( vargs -> idx != vargs -> dp -> argc )
        {
            /* produce warning */
        }
    }

    pd -> fmt_idx = fmt_idx;

    return rc;
}