int main(int argc, char **argv) { program_name = argv[0]; static char stderr_buf[BUFSIZ]; setbuf(stderr, stderr_buf); const char *base_name = 0; typedef int (*parser_t)(const char *); parser_t parser = do_file; const char *directory = 0; const char *foption = 0; int opt; static const struct option long_options[] = { { "help", no_argument, 0, CHAR_MAX + 1 }, { "version", no_argument, 0, 'v' }, { NULL, 0, 0, 0 } }; while ((opt = getopt_long(argc, argv, "c:o:h:i:k:l:t:n:c:d:f:vw", long_options, NULL)) != EOF) switch (opt) { case 'c': common_words_file = optarg; break; case 'd': directory = optarg; break; case 'f': foption = optarg; break; case 'h': check_integer_arg('h', optarg, 1, &hash_table_size); if (!is_prime(hash_table_size)) { while (!is_prime(++hash_table_size)) ; warning("%1 not prime: using %2 instead", optarg, hash_table_size); } break; case 'i': ignore_fields = optarg; break; case 'k': check_integer_arg('k', optarg, 1, &max_keys_per_item); break; case 'l': check_integer_arg('l', optarg, 0, &shortest_len); break; case 'n': check_integer_arg('n', optarg, 0, &n_ignore_words); break; case 'o': base_name = optarg; break; case 't': check_integer_arg('t', optarg, 1, &truncate_len); break; case 'w': parser = do_whole_file; break; case 'v': printf("GNU indxbib (groff) version %s\n", Version_string); exit(0); break; case CHAR_MAX + 1: // --help usage(stdout); exit(0); break; case '?': usage(stderr); exit(1); break; default: assert(0); break; } if (optind >= argc && foption == 0) fatal("no files and no -f option"); if (!directory) { char *path = get_cwd(); store_filename(path); a_delete path; } else store_filename(directory); init_hash_table(); store_filename(common_words_file); store_filename(ignore_fields); key_buffer = new char[truncate_len]; read_common_words_file(); if (!base_name) base_name = optind < argc ? argv[optind] : DEFAULT_INDEX_NAME; const char *p = strrchr(base_name, DIR_SEPS[0]), *p1; const char *sep = &DIR_SEPS[1]; while (*sep) { p1 = strrchr(base_name, *sep); if (p1 && (!p || p1 > p)) p = p1; sep++; } size_t name_max; if (p) { char *dir = strsave(base_name); dir[p - base_name] = '\0'; name_max = file_name_max(dir); a_delete dir; } else name_max = file_name_max("."); const char *filename = p ? p + 1 : base_name; if (strlen(filename) + sizeof(INDEX_SUFFIX) - 1 > name_max) fatal("`%1.%2' is too long for a filename", filename, INDEX_SUFFIX); if (p) { p++; temp_index_file = new char[p - base_name + sizeof(TEMP_INDEX_TEMPLATE)]; memcpy(temp_index_file, base_name, p - base_name); strcpy(temp_index_file + (p - base_name), TEMP_INDEX_TEMPLATE); } else { temp_index_file = strsave(TEMP_INDEX_TEMPLATE); } catch_fatal_signals(); int fd = mkstemp(temp_index_file); if (fd < 0) fatal("can't create temporary index file: %1", strerror(errno)); indxfp = fdopen(fd, FOPEN_WB); if (indxfp == 0) fatal("fdopen failed"); if (fseek(indxfp, sizeof(index_header), 0) < 0) fatal("can't seek past index header: %1", strerror(errno)); int failed = 0; if (foption) { FILE *fp = stdin; if (strcmp(foption, "-") != 0) { errno = 0; fp = fopen(foption, "r"); if (!fp) fatal("can't open `%1': %2", foption, strerror(errno)); } string path; int lineno = 1; for (;;) { int c; for (c = getc(fp); c != '\n' && c != EOF; c = getc(fp)) { if (c == '\0') error_with_file_and_line(foption, lineno, "nul character in pathname ignored"); else path += c; } if (path.length() > 0) { path += '\0'; if (!(*parser)(path.contents())) failed = 1; path.clear(); } if (c == EOF) break; lineno++; } if (fp != stdin) fclose(fp); } for (int i = optind; i < argc; i++) if (!(*parser)(argv[i])) failed = 1; write_hash_table(); if (fclose(indxfp) < 0) fatal("error closing temporary index file: %1", strerror(errno)); char *index_file = new char[strlen(base_name) + sizeof(INDEX_SUFFIX)]; strcpy(index_file, base_name); strcat(index_file, INDEX_SUFFIX); #ifdef HAVE_RENAME #ifdef __EMX__ if (access(index_file, R_OK) == 0) unlink(index_file); #endif /* __EMX__ */ if (rename(temp_index_file, index_file) < 0) { #ifdef __MSDOS__ // RENAME could fail on plain MSDOS filesystems because // INDEX_FILE is an invalid filename, e.g. it has multiple dots. char *fname = p ? index_file + (p - base_name) : 0; char *dot = 0; // Replace the dot with an underscore and try again. if (fname && (dot = strchr(fname, '.')) != 0 && strcmp(dot, INDEX_SUFFIX) != 0) *dot = '_'; if (rename(temp_index_file, index_file) < 0) #endif fatal("can't rename temporary index file: %1", strerror(errno)); } #else /* not HAVE_RENAME */ ignore_fatal_signals(); if (unlink(index_file) < 0) { if (errno != ENOENT) fatal("can't unlink `%1': %2", index_file, strerror(errno)); } if (link(temp_index_file, index_file) < 0) fatal("can't link temporary index file: %1", strerror(errno)); if (unlink(temp_index_file) < 0) fatal("can't unlink temporary index file: %1", strerror(errno)); #endif /* not HAVE_RENAME */ temp_index_file = 0; return failed; }
/* parse_format_string * parse format string and args into structured format */ static rc_t parse_format_string ( const char *fmt_str, ParseData *pd, VAList *vargs ) { rc_t rc; uint32_t i, fmt_idx; PrintFmt *fmt = pd -> fmt; /* initialize returned counters */ pd -> lit_size = 0; pd -> str_idx = pd -> fmt_idx = pd -> arg_idx = 0; /* loop over format string */ for ( rc = 0, i = fmt_idx = 0; fmt_str [ i ] != 0; ++ i ) { uint32_t domain; bool alternate, numeric; char size_modifier, time_modifier; bool has_width, has_precision, has_index; /* loop to gather literal portions */ uint32_t start; for ( start = i; ; ++ i ) { /* run until we hit start of substitution token or until we hit end of format string */ if ( fmt_str [ i ] != 0 && fmt_str [ i ] != '%' ) continue; /* detect a literal string */ if ( i != start ) { /* expand into overflow */ if ( fmt_idx == LOCAL_FMT_COUNT ) { rc = create_overflow ( pd, fmt_idx ); if ( rc != 0 ) return rc; fmt = pd -> fmt; } /* create a text-literal format */ memset ( & fmt [ fmt_idx ], 0, sizeof fmt [ 0 ] ); fmt [ fmt_idx ] . u . l . text = & fmt_str [ start ]; fmt [ fmt_idx ] . u . l . size = i - start; pd -> lit_size += i - start; fmt [ fmt_idx ] . fmt = spfText; fmt [ fmt_idx ] . type = sptLiteral; /* increment counter */ ++ fmt_idx; } /* detect escape sequence */ if ( fmt_str [ i ] == 0 || fmt_str [ i + 1 ] != '%' ) break; /* skip over escape */ start = ++ i; } /* done when NUL byte is seen */ if ( fmt_str [ i ] == 0 ) break; /* detect overflow */ if ( fmt_idx == LOCAL_FMT_COUNT ) { rc = create_overflow ( pd, fmt_idx ); if ( rc != 0 ) return rc; fmt = pd -> fmt; } /* initial format * thousands_separate = false * add_prefix = false * force_decimal_point = false * leave_trailing_zeros = false * print_time = false * print_date = false * print_weekday = false * print_timezone = false * hour_24 = false * sign = 0 * left_fill = space */ memset ( & fmt [ fmt_idx ], 0, sizeof fmt [ 0 ] ); fmt [ fmt_idx ] . left_fill = ' '; /* scan flags */ alternate = false; while ( 1 ) { switch ( fmt_str [ ++ i ] ) { /* plus and space modify application of sign to signed integer and floating point conversions. plus overrides space. */ case ' ': if ( fmt [ fmt_idx ] . sign == 0 ) case '+': fmt [ fmt_idx ] . sign = fmt_str [ i ]; continue; /* dash indicates left-alignment. indicate this by setting "left_fill" to NUL. */ case '-': fmt [ fmt_idx ] . left_fill = 0; continue; /* zero indicates an alternate left-fill for numeric conversions. the zero is inserted before any sign character in { '+', '-' or ' ' }. since "left_fill" is also used to indicate alignment, only store when right aligning. */ case '0': if ( fmt [ fmt_idx ] . left_fill != 0 ) fmt [ fmt_idx ] . left_fill = '0'; continue; /* hash indicates that the formatter should use an "alternate" approach. that approach is specific to the format. */ case '#': alternate = true; continue; /* comma ( or apostrophe outside of US ) indicates that the integer portion of a numeral should use a comma as a thousands separator for legibility. */ case ',': case '\'': fmt [ fmt_idx ] . thousands_separate = 1; continue; } /* we've hit a non-flag character */ break; } /* minimum field width */ has_width = false; if ( isdigit ( fmt_str [ i ] ) ) { /* literal */ has_width = true; fmt [ fmt_idx ] . u . f . min_field_width = fmt_str [ i ] - '0'; while ( isdigit ( fmt_str [ ++ i ] ) ) { fmt [ fmt_idx ] . u . f . min_field_width *= 10; fmt [ fmt_idx ] . u . f . min_field_width += fmt_str [ i ] - '0'; } } else if ( fmt_str [ i ] == '*' ) { /* external */ rc = check_integer_arg ( vargs ); if ( rc != 0 ) return rc; has_width = true; fmt [ fmt_idx ] . ext_field_width = 1; ++ pd -> arg_idx; ++ i; } /* precision */ has_precision = false; if ( fmt_str [ i ] == '.' ) { /* a single dot implies a precision value of 0 */ has_precision = true; if ( isdigit ( fmt_str [ ++ i ] ) ) { /* a literal precision */ fmt [ fmt_idx ] . u . f . precision = fmt_str [ i ] - '0'; while ( isdigit ( fmt_str [ ++ i ] ) ) { fmt [ fmt_idx ] . u . f . precision *= 10; fmt [ fmt_idx ] . u . f . precision += fmt_str [ i ] - '0'; } } else if ( fmt_str [ i ] == '*' ) { /* external */ rc = check_integer_arg ( vargs ); if ( rc != 0 ) return rc; fmt [ fmt_idx ] . ext_precision = 1; ++ pd -> arg_idx; ++ i; } else if ( fmt_str [ i ] == '-' ) { /* eat a negative precision - treat as 0 */ while ( isdigit ( fmt_str [ ++ i ] ) ) ( void ) 0; } } /* index - given when parameter is a vector */ has_index = false; if ( fmt_str [ i ] == ':' ) { bool has_start, has_len, has_end, end_is_stop; has_start = has_len = has_end = end_is_stop = false; /* parameter is taken as a vector, with a default index starting at 0 */ has_index = true; if ( isdigit ( fmt_str [ ++ i ] ) ) { /* literal index */ fmt [ fmt_idx ] . u . f . start_idx = fmt_str [ i ] - '0'; while ( isdigit ( fmt_str [ ++ i ] ) ) { fmt [ fmt_idx ] . u . f . start_idx *= 10; fmt [ fmt_idx ] . u . f . start_idx += fmt_str [ i ] - '0'; } has_start = true; } else switch ( fmt_str [ i ] ) { case '*': /* external */ rc = check_integer_arg ( vargs ); if ( rc != 0 ) return rc; fmt [ fmt_idx ] . ext_start_index = 1; ++ pd -> arg_idx; ++ i; has_start = true; break; case '$': fmt [ fmt_idx ] . inf_start_index = 1; fmt [ fmt_idx ] . ext_start_index = 1; ++ pd -> arg_idx; ++ i; has_start = true; break; } /* detect range */ switch ( fmt_str [ i ] ) { /* given as start-stop */ case '-': end_is_stop = true; /* given as start/len */ case '/': has_len = true; if ( isdigit ( fmt_str [ ++ i ] ) ) { /* literal selection length or end */ fmt [ fmt_idx ] . u . f . select_len = fmt_str [ i ] - '0'; while ( isdigit ( fmt_str [ ++ i ] ) ) { fmt [ fmt_idx ] . u . f . select_len *= 10; fmt [ fmt_idx ] . u . f . select_len += fmt_str [ i ] - '0'; } has_end = true; } else switch ( fmt_str [ i ] ) { case '*': /* external */ rc = check_integer_arg ( vargs ); if ( rc != 0 ) return rc; /* external selection length or end */ fmt [ fmt_idx ] . ext_stop_index = end_is_stop; fmt [ fmt_idx ] . ext_select_len = ! end_is_stop; ++ pd -> arg_idx; ++ i; has_end = true; break; case '$': /* ignore index end if start is infinite */ if ( ! fmt [ fmt_idx ] . inf_start_index ) { fmt [ fmt_idx ] . inf_stop_index = 1; fmt [ fmt_idx ] . ext_stop_index = 1; ++ pd -> arg_idx; end_is_stop = has_end = true; } ++ i; break; case '-': /* negatives are garbage */ while ( isdigit ( fmt_str [ ++ i ] ) ) ( void ) 0; break; default: end_is_stop = false; } break; } if ( ! has_len && has_start ) fmt [ fmt_idx ] . u . f . select_len = 1; } /* size - accept for brownie-points and for KTime */ size_modifier = time_modifier = 0; switch ( fmt_str [ i ] ) { /* "Tiny" modifier - like "hh" in C format */ case 't': /* "Half" modifier - same as C format */ case 'h': /* "Long" modifier - means 64-bit for integers, otherwise like C */ case 'l': size_modifier = time_modifier = fmt_str [ i ++ ]; break; /* "siZe" modifier - whatever the size of size_t is */ case 'z': ++ i; time_modifier = 'z'; if ( sizeof ( size_t ) == sizeof ( uint64_t ) ) size_modifier = 'l'; break; } /* output format describes the formatting to apply on output if precision has not been set, give it a default value */ domain = 0; numeric = false; switch ( fmt_str [ i ] ) { /* decimal signed integer */ case 'd': case 'i': fmt [ fmt_idx ] . radix = 10; fmt [ fmt_idx ] . fmt = spfSignedInt; numeric = true; if ( ! has_precision ) fmt [ fmt_idx ] . u . f . precision = 1; else if ( fmt [ fmt_idx ] . left_fill == '0' ) fmt [ fmt_idx ] . left_fill = ' '; domain = vtdInt; break; /* decimal unsigned integer */ case 'u': fmt [ fmt_idx ] . radix = 10; unsigned_int: fmt [ fmt_idx ] . fmt = spfUnsigned; fmt [ fmt_idx ] . sign = 0; numeric = true; if ( ! has_precision ) fmt [ fmt_idx ] . u . f . precision = 1; else if ( fmt [ fmt_idx ] . left_fill == '0' ) fmt [ fmt_idx ] . left_fill = ' '; domain = vtdUint; break; /* hex unsigned integer */ case 'x': fmt [ fmt_idx ] . add_prefix = alternate; fmt [ fmt_idx ] . radix = 16; goto unsigned_int; /* upper-case hex unsigned integer */ case 'X': fmt [ fmt_idx ] . upper_case_num = 1; fmt [ fmt_idx ] . add_prefix = alternate; fmt [ fmt_idx ] . radix = 16; goto unsigned_int; /* octal unsigned integer */ case 'o': fmt [ fmt_idx ] . add_prefix = alternate; fmt [ fmt_idx ] . radix = 8; goto unsigned_int; /* binary unsigned integer */ case 'b': fmt [ fmt_idx ] . add_prefix = alternate; fmt [ fmt_idx ] . radix = 2; goto unsigned_int; /* decimal signed floating point */ case 'f': fmt [ fmt_idx ] . fmt = spfStdFloat; fmt_float: fmt [ fmt_idx ] . radix = 10; fmt [ fmt_idx ] . force_decimal_point = alternate; numeric = true; if ( ! has_precision ) fmt [ fmt_idx ] . u . f . precision = 6; domain = vtdFloat; break; /* scientific notation floating point */ case 'e': fmt [ fmt_idx ] . fmt = spfSciFloat; goto fmt_float; /* "general" floating point */ case 'g': fmt [ fmt_idx ] . leave_trailing_zeros = alternate; fmt [ fmt_idx ] . fmt = spfGenFloat; goto fmt_float; /* character data */ case 's': if ( ! has_precision ) /* no break */ case 'c': fmt [ fmt_idx ] . u . f . precision = -1; fmt [ fmt_idx ] . fmt = spfText; domain = vtdUnicode; break; default: return RC ( rcXF, rcString, rcFormatting, rcFormat, rcUnrecognized ); } /* handle zero padding for non-numeric cases */ if ( ! numeric && fmt [ fmt_idx ] . left_fill == '0' ) fmt [ fmt_idx ] . left_fill = ' '; /* take size from actual parameter */ rc = extract_size_modifier ( vargs, & size_modifier ); if ( rc != 0 ) return rc; /* determine type from argument */ switch ( vargs -> dp -> argv [ vargs -> idx ] . desc . domain ) { case vtdBool: case vtdUint: switch ( domain ) { case vtdBool: case vtdUint: case vtdInt: break; case vtdFloat: fmt [ fmt_idx ] . type_cast = 1; break; default: return RC ( rcXF, rcString, rcFormatting, rcParam, rcIncorrect ); } switch ( size_modifier ) { case 't': fmt [ fmt_idx ] . type = sptUnsignedInt8Vect; break; case 'h': fmt [ fmt_idx ] . type = sptUnsignedInt16Vect; break; case 0: fmt [ fmt_idx ] . type = sptUnsignedInt32Vect; break; case 'l': fmt [ fmt_idx ] . type = sptUnsignedInt64Vect; break; } break; case vtdInt: switch ( domain ) { case vtdBool: case vtdUint: case vtdInt: break; case vtdFloat: fmt [ fmt_idx ] . type_cast = 1; break; default: return RC ( rcXF, rcString, rcFormatting, rcParam, rcIncorrect ); } switch ( size_modifier ) { case 't': fmt [ fmt_idx ] . type = sptSignedInt8Vect; break; case 'h': fmt [ fmt_idx ] . type = sptSignedInt16Vect; break; case 0: fmt [ fmt_idx ] . type = sptSignedInt32Vect; break; case 'l': fmt [ fmt_idx ] . type = sptSignedInt64Vect; break; } break; case vtdFloat: switch ( domain ) { case vtdBool: case vtdUint: case vtdInt: fmt [ fmt_idx ] . type_cast = 1; break; case vtdFloat: break; default: return RC ( rcXF, rcString, rcFormatting, rcParam, rcIncorrect ); } switch ( size_modifier ) { case 'h': fmt [ fmt_idx ] . type = sptFloat32Vect; break; case 0: fmt [ fmt_idx ] . type = sptFloat64Vect; break; } break; case vtdAscii: switch ( domain ) { case vtdAscii: case vtdUnicode: if ( size_modifier != 0 ) fmt [ fmt_idx ] . type_cast = 1; break; default: return RC ( rcXF, rcString, rcFormatting, rcParam, rcIncorrect ); } fmt [ fmt_idx ] . type = sptString; ++ pd -> str_idx; break; case vtdUnicode: switch ( domain ) { case vtdAscii: case vtdUnicode: switch ( size_modifier ) { case 0: fmt [ fmt_idx ] . type = sptString; break; case 'h': fmt [ fmt_idx ] . type_cast = 1; fmt [ fmt_idx ] . type = sptUCS2String; break; case 'l': fmt [ fmt_idx ] . type_cast = 1; fmt [ fmt_idx ] . type = sptUTF32String; break; } break; default: return RC ( rcXF, rcString, rcFormatting, rcParam, rcIncorrect ); } ++ pd -> str_idx; break; } /* account for format argument */ ++ fmt_idx; ++ pd -> arg_idx; ++ vargs -> idx; } /* record final fmt */ if ( rc == 0 ) { if ( fmt_idx == LOCAL_FMT_COUNT ) { rc = create_overflow ( pd, fmt_idx ); if ( rc != 0 ) return rc; fmt = pd -> fmt; } memset ( & fmt [ fmt_idx ++ ], 0, sizeof fmt [ 0 ] ); /* if not all arguments were consumed, should this be an error? */ if ( vargs -> idx != vargs -> dp -> argc ) { /* produce warning */ } } pd -> fmt_idx = fmt_idx; return rc; }