static void main_usage_separator_options(FILE* o, char* argv0) { fprintf(o, " --rs --irs --ors Record separators, e.g. 'lf' or '\\r\\n'\n"); fprintf(o, " --fs --ifs --ofs --repifs Field separators, e.g. comma\n"); fprintf(o, " --ps --ips --ops Pair separators, e.g. equals sign\n"); fprintf(o, " Notes:\n"); fprintf(o, " * IPS/OPS are only used for DKVP and XTAB formats, since only in these formats\n"); fprintf(o, " do key-value pairs appear juxtaposed.\n"); fprintf(o, " * IRS/ORS are ignored for XTAB format. Nominally IFS and OFS are newlines;\n"); fprintf(o, " XTAB records are separated by two or more consecutive IFS/OFS -- i.e.\n"); fprintf(o, " a blank line.\n"); fprintf(o, " * OFS must be single-character for PPRINT format. This is because it is used\n"); fprintf(o, " with repetition for alignment; multi-character separators would make\n"); fprintf(o, " alignment impossible.\n"); fprintf(o, " * OPS may be multi-character for XTAB format, in which case alignment is\n"); fprintf(o, " disabled.\n"); fprintf(o, " * DKVP, NIDX, CSVLITE, PPRINT, and XTAB formats are intended to handle\n"); fprintf(o, " platform-native text data. In particular, this means LF line-terminators\n"); fprintf(o, " by default on Linux/OSX. You can use \"--dkvp --rs crlf\" for\n"); fprintf(o, " CRLF-terminated DKVP files, and so on.\n"); fprintf(o, " * CSV is intended to handle RFC-4180-compliant data. In particular, this means\n"); fprintf(o, " it uses CRLF line-terminators by default. You can use \"--csv --rs lf\" for\n"); fprintf(o, " Linux-native CSV files.\n"); fprintf(o, " * All RS/FS/PS options are ignored for JSON format: JSON doesn't allow\n"); fprintf(o, " changing these.\n"); fprintf(o, " * You can specify separators in any of the following ways, shown by example:\n"); fprintf(o, " - Type them out, quoting as necessary for shell escapes, e.g.\n"); fprintf(o, " \"--fs '|' --ips :\"\n"); fprintf(o, " - C-style escape sequences, e.g. \"--rs '\\r\\n' --fs '\\t'\".\n"); fprintf(o, " - To avoid backslashing, you can use any of the following names:\n"); fprintf(o, " "); lhmss_t* pmap = get_desc_to_chars_map(); for (lhmsse_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) { fprintf(o, " %s", pe->key); } fprintf(o, "\n"); fprintf(o, " * Default separators by format:\n"); fprintf(o, " %-12s %-8s %-8s %s\n", "File format", "RS", "FS", "PS"); lhmss_t* default_rses = get_default_rses(); lhmss_t* default_fses = get_default_fses(); lhmss_t* default_pses = get_default_pses(); for (lhmsse_t* pe = default_rses->phead; pe != NULL; pe = pe->pnext) { char* filefmt = pe->key; char* rs = pe->value; char* fs = lhmss_get(default_fses, filefmt); char* ps = lhmss_get(default_pses, filefmt); fprintf(o, " %-12s %-8s %-8s %s\n", filefmt, rebackslash(rs), rebackslash(fs), rebackslash(ps)); } }
static char* sep_from_arg(char* arg, char* argv0) { char* chars = lhmss_get(get_desc_to_chars_map(), arg); if (chars != NULL) // E.g. crlf return chars; else // E.g. '\r\n' return mlr_unbackslash(arg); }
static void stats1_percentile_emit(void* pvstate, char* value_field_name, char* stats1_acc_name, int copy_data, lrec_t* poutrec) { stats1_percentile_state_t* pstate = pvstate; double p; (void)sscanf(stats1_acc_name, "p%lf", &p); // Assuming this was range-checked earlier on to be in [0,100]. mv_t v = percentile_keeper_emit(pstate->ppercentile_keeper, p); char* s = mv_alloc_format_val(&v); // For this type, one accumulator tracks many stats1_names, but a single value_field_name. char* output_field_name = lhmss_get(pstate->poutput_field_names, stats1_acc_name); if (output_field_name == NULL) { output_field_name = mlr_paste_3_strings(value_field_name, "_", stats1_acc_name); lhmss_put(pstate->poutput_field_names, mlr_strdup_or_die(stats1_acc_name), output_field_name, FREE_ENTRY_KEY|FREE_ENTRY_VALUE); } lrec_put(poutrec, mlr_strdup_or_die(output_field_name), s, FREE_ENTRY_KEY|FREE_ENTRY_VALUE); }
// ---------------------------------------------------------------- cli_opts_t* parse_command_line(int argc, char** argv) { cli_opts_t* popts = mlr_malloc_or_die(sizeof(cli_opts_t)); memset(popts, 0, sizeof(*popts)); popts->irs = NULL; popts->ifs = NULL; popts->ips = NULL; popts->allow_repeat_ifs = NEITHER_TRUE_NOR_FALSE; popts->allow_repeat_ips = NEITHER_TRUE_NOR_FALSE; popts->use_implicit_csv_header = FALSE; popts->headerless_csv_output = FALSE; popts->ors = NULL; popts->ofs = NULL; popts->ops = NULL; popts->right_justify_xtab_value = FALSE; popts->stack_json_output_vertically = FALSE; popts->wrap_json_output_in_outer_list = FALSE; popts->quote_json_values_always = FALSE; popts->json_flatten_separator = DEFAULT_JSON_FLATTEN_SEPARATOR; popts->ofmt = DEFAULT_OFMT; popts->oquoting = DEFAULT_OQUOTING; popts->plrec_reader = NULL; popts->plrec_writer = NULL; popts->prepipe = NULL; popts->filenames = NULL; popts->ifile_fmt = "dkvp"; popts->ofile_fmt = "dkvp"; popts->use_mmap_for_read = TRUE; int left_align_pprint = TRUE; int have_rand_seed = FALSE; unsigned rand_seed = 0; int argi = 1; for (; argi < argc; argi++) { if (argv[argi][0] != '-') { break; } else if (streq(argv[argi], "--version")) { #ifdef HAVE_CONFIG_H printf("Miller %s\n", PACKAGE_VERSION); #else printf("Miller %s\n", MLR_VERSION); #endif // HAVE_CONFIG_H exit(0); } else if (streq(argv[argi], "-h")) { main_usage(stdout, argv[0]); exit(0); } else if (streq(argv[argi], "--help")) { main_usage(stdout, argv[0]); exit(0); } else if (streq(argv[argi], "--help-all-verbs")) { usage_all_verbs(argv[0]); } else if (streq(argv[argi], "--list-all-verbs") || streq(argv[argi], "-l")) { list_all_verbs(stdout, ""); exit(0); } else if (streq(argv[argi], "--list-all-verbs-raw")) { list_all_verbs_raw(stdout); exit(0); } else if (streq(argv[argi], "--list-all-functions-raw")) { lrec_evaluator_list_all_functions_raw(stdout); exit(0); } else if (streq(argv[argi], "--help-all-functions") || streq(argv[argi], "-f")) { lrec_evaluator_function_usage(stdout, NULL); exit(0); } else if (streq(argv[argi], "--help-function") || streq(argv[argi], "--hf")) { check_arg_count(argv, argi, argc, 2); lrec_evaluator_function_usage(stdout, argv[argi+1]); exit(0); // main-usage subsections, individually accessible for the benefit of // the manpage-autogenerator } else if (streq(argv[argi], "--usage-synopsis")) { main_usage_synopsis(stdout, argv[0]); exit(0); } else if (streq(argv[argi], "--usage-examples")) { main_usage_examples(stdout, argv[0], ""); exit(0); } else if (streq(argv[argi], "--usage-list-all-verbs")) { list_all_verbs(stdout, ""); exit(0); } else if (streq(argv[argi], "--usage-help-options")) { main_usage_help_options(stdout, argv[0]); exit(0); } else if (streq(argv[argi], "--usage-functions")) { main_usage_functions(stdout, argv[0], ""); exit(0); } else if (streq(argv[argi], "--usage-data-format-examples")) { main_usage_data_format_examples(stdout, argv[0]); exit(0); } else if (streq(argv[argi], "--usage-data-format-options")) { main_usage_data_format_options(stdout, argv[0]); exit(0); } else if (streq(argv[argi], "--usage-compressed-data-options")) { main_usage_compressed_data_options(stdout, argv[0]); exit(0); } else if (streq(argv[argi], "--usage-separator-options")) { main_usage_separator_options(stdout, argv[0]); exit(0); } else if (streq(argv[argi], "--usage-csv-options")) { main_usage_csv_options(stdout, argv[0]); exit(0); } else if (streq(argv[argi], "--usage-double-quoting")) { main_usage_double_quoting(stdout, argv[0]); exit(0); } else if (streq(argv[argi], "--usage-numerical-formatting")) { main_usage_numerical_formatting(stdout, argv[0]); exit(0); } else if (streq(argv[argi], "--usage-other-options")) { main_usage_other_options(stdout, argv[0]); exit(0); } else if (streq(argv[argi], "--usage-then-chaining")) { main_usage_then_chaining(stdout, argv[0]); exit(0); } else if (streq(argv[argi], "--usage-see-also")) { main_usage_see_also(stdout, argv[0]); exit(0); } else if (streq(argv[argi], "--rs")) { check_arg_count(argv, argi, argc, 2); popts->ors = sep_from_arg(argv[argi+1], argv[0]); popts->irs = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--irs")) { check_arg_count(argv, argi, argc, 2); popts->irs = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--ors")) { check_arg_count(argv, argi, argc, 2); popts->ors = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--fs")) { check_arg_count(argv, argi, argc, 2); popts->ofs = sep_from_arg(argv[argi+1], argv[0]); popts->ifs = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--ifs")) { check_arg_count(argv, argi, argc, 2); popts->ifs = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--ofs")) { check_arg_count(argv, argi, argc, 2); popts->ofs = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--repifs")) { popts->allow_repeat_ifs = TRUE; } else if (streq(argv[argi], "--implicit-csv-header")) { popts->use_implicit_csv_header = TRUE; } else if (streq(argv[argi], "--headerless-csv-output")) { popts->headerless_csv_output = TRUE; } else if (streq(argv[argi], "-p")) { popts->ifile_fmt = "nidx"; popts->ofile_fmt = "nidx"; popts->ifs = " "; popts->ofs = " "; popts->allow_repeat_ifs = TRUE; } else if (streq(argv[argi], "--ps")) { check_arg_count(argv, argi, argc, 2); popts->ops = sep_from_arg(argv[argi+1], argv[0]); popts->ips = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--ips")) { check_arg_count(argv, argi, argc, 2); popts->ips = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--ops")) { check_arg_count(argv, argi, argc, 2); popts->ops = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--xvright")) { popts->right_justify_xtab_value = TRUE; } else if (streq(argv[argi], "--jvstack")) { popts->stack_json_output_vertically = TRUE; } else if (streq(argv[argi], "--jlistwrap")) { popts->wrap_json_output_in_outer_list = TRUE; } else if (streq(argv[argi], "--jquoteall")) { popts->quote_json_values_always = TRUE; } else if (streq(argv[argi], "--jflatsep")) { check_arg_count(argv, argi, argc, 2); popts->json_flatten_separator = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--csv")) { popts->ifile_fmt = popts->ofile_fmt = "csv"; } else if (streq(argv[argi], "--icsv")) { popts->ifile_fmt = "csv"; } else if (streq(argv[argi], "--ocsv")) { popts->ofile_fmt = "csv"; } else if (streq(argv[argi], "--csvlite")) { popts->ifile_fmt = popts->ofile_fmt = "csvlite"; } else if (streq(argv[argi], "--icsvlite")) { popts->ifile_fmt = "csvlite"; } else if (streq(argv[argi], "--ocsvlite")) { popts->ofile_fmt = "csvlite"; } else if (streq(argv[argi], "--dkvp")) { popts->ifile_fmt = popts->ofile_fmt = "dkvp"; } else if (streq(argv[argi], "--idkvp")) { popts->ifile_fmt = "dkvp"; } else if (streq(argv[argi], "--odkvp")) { popts->ofile_fmt = "dkvp"; } else if (streq(argv[argi], "--json")) { popts->ifile_fmt = popts->ofile_fmt = "json"; } else if (streq(argv[argi], "--ijson")) { popts->ifile_fmt = "json"; } else if (streq(argv[argi], "--ojson")) { popts->ofile_fmt = "json"; } else if (streq(argv[argi], "--nidx")) { popts->ifile_fmt = popts->ofile_fmt = "nidx"; } else if (streq(argv[argi], "--inidx")) { popts->ifile_fmt = "nidx"; } else if (streq(argv[argi], "--onidx")) { popts->ofile_fmt = "nidx"; } else if (streq(argv[argi], "--xtab")) { popts->ifile_fmt = popts->ofile_fmt = "xtab"; } else if (streq(argv[argi], "--ixtab")) { popts->ifile_fmt = "xtab"; } else if (streq(argv[argi], "--oxtab")) { popts->ofile_fmt = "xtab"; } else if (streq(argv[argi], "--ipprint")) { popts->ifile_fmt = "csvlite"; popts->ifs = " "; popts->allow_repeat_ifs = TRUE; } else if (streq(argv[argi], "--opprint")) { popts->ofile_fmt = "pprint"; } else if (streq(argv[argi], "--pprint")) { popts->ifile_fmt = "csvlite"; popts->ifs = " "; popts->allow_repeat_ifs = TRUE; popts->ofile_fmt = "pprint"; } else if (streq(argv[argi], "--right")) { left_align_pprint = FALSE; } else if (streq(argv[argi], "--ofmt")) { check_arg_count(argv, argi, argc, 2); popts->ofmt = argv[argi+1]; argi++; } else if (streq(argv[argi], "--quote-all")) { popts->oquoting = QUOTE_ALL; } else if (streq(argv[argi], "--quote-none")) { popts->oquoting = QUOTE_NONE; } else if (streq(argv[argi], "--quote-minimal")) { popts->oquoting = QUOTE_MINIMAL; } else if (streq(argv[argi], "--quote-numeric")) { popts->oquoting = QUOTE_NUMERIC; } else if (streq(argv[argi], "--mmap")) { popts->use_mmap_for_read = TRUE; } else if (streq(argv[argi], "--no-mmap")) { popts->use_mmap_for_read = FALSE; } else if (streq(argv[argi], "--seed")) { check_arg_count(argv, argi, argc, 2); if (sscanf(argv[argi+1], "0x%x", &rand_seed) == 1) { have_rand_seed = TRUE; } else if (sscanf(argv[argi+1], "%u", &rand_seed) == 1) { have_rand_seed = TRUE; } else { main_usage(stderr, argv[0]); exit(1); } argi++; } else if (streq(argv[argi], "--prepipe")) { check_arg_count(argv, argi, argc, 2); popts->prepipe = argv[argi+1]; popts->use_mmap_for_read = FALSE; argi++; } else { usage_unrecognized_verb(argv[0], argv[argi]); } } lhmss_t* default_rses = get_default_rses(); lhmss_t* default_fses = get_default_fses(); lhmss_t* default_pses = get_default_pses(); lhmsi_t* default_repeat_ifses = get_default_repeat_ifses(); lhmsi_t* default_repeat_ipses = get_default_repeat_ipses(); if (popts->irs == NULL) popts->irs = lhmss_get(default_rses, popts->ifile_fmt); if (popts->ifs == NULL) popts->ifs = lhmss_get(default_fses, popts->ifile_fmt); if (popts->ips == NULL) popts->ips = lhmss_get(default_pses, popts->ifile_fmt); if (popts->allow_repeat_ifs == NEITHER_TRUE_NOR_FALSE) popts->allow_repeat_ifs = lhmsi_get(default_repeat_ifses, popts->ifile_fmt); if (popts->allow_repeat_ips == NEITHER_TRUE_NOR_FALSE) popts->allow_repeat_ips = lhmsi_get(default_repeat_ipses, popts->ifile_fmt); if (popts->ors == NULL) popts->ors = lhmss_get(default_rses, popts->ofile_fmt); if (popts->ofs == NULL) popts->ofs = lhmss_get(default_fses, popts->ofile_fmt); if (popts->ops == NULL) popts->ops = lhmss_get(default_pses, popts->ofile_fmt); if (popts->irs == NULL) { fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); exit(1); } if (popts->ifs == NULL) { fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); exit(1); } if (popts->ips == NULL) { fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); exit(1); } if (popts->allow_repeat_ifs == NEITHER_TRUE_NOR_FALSE) { fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); exit(1); } if (popts->allow_repeat_ips == NEITHER_TRUE_NOR_FALSE) { fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); exit(1); } if (popts->ors == NULL) { fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); exit(1); } if (popts->ofs == NULL) { fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); exit(1); } if (popts->ops == NULL) { fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); exit(1); } if (streq(popts->ofile_fmt, "pprint") && strlen(popts->ofs) != 1) { fprintf(stderr, "%s: OFS for PPRINT format must be single-character; got \"%s\".\n", argv[0], popts->ofs); return NULL; } if (streq(popts->ofile_fmt, "dkvp")) popts->plrec_writer = lrec_writer_dkvp_alloc(popts->ors, popts->ofs, popts->ops); else if (streq(popts->ofile_fmt, "json")) popts->plrec_writer = lrec_writer_json_alloc(popts->stack_json_output_vertically, popts->wrap_json_output_in_outer_list, popts->quote_json_values_always, popts->json_flatten_separator); else if (streq(popts->ofile_fmt, "csv")) popts->plrec_writer = lrec_writer_csv_alloc(popts->ors, popts->ofs, popts->oquoting, popts->headerless_csv_output); else if (streq(popts->ofile_fmt, "csvlite")) popts->plrec_writer = lrec_writer_csvlite_alloc(popts->ors, popts->ofs, popts->headerless_csv_output); else if (streq(popts->ofile_fmt, "nidx")) popts->plrec_writer = lrec_writer_nidx_alloc(popts->ors, popts->ofs); else if (streq(popts->ofile_fmt, "xtab")) popts->plrec_writer = lrec_writer_xtab_alloc(popts->ofs, popts->ops, popts->right_justify_xtab_value); else if (streq(popts->ofile_fmt, "pprint")) popts->plrec_writer = lrec_writer_pprint_alloc(popts->ors, popts->ofs[0], left_align_pprint); else { main_usage(stderr, argv[0]); exit(1); } if ((argc - argi) < 1) { main_usage(stderr, argv[0]); exit(1); } popts->pmapper_list = sllv_alloc(); while (TRUE) { check_arg_count(argv, argi, argc, 1); char* verb = argv[argi]; mapper_setup_t* pmapper_setup = look_up_mapper_setup(verb); if (pmapper_setup == NULL) { fprintf(stderr, "%s: verb \"%s\" not found. Please use \"%s --help\" for a list.\n", argv[0], verb, argv[0]); exit(1); } if ((argc - argi) >= 2) { if (streq(argv[argi+1], "-h") || streq(argv[argi+1], "--help")) { pmapper_setup->pusage_func(stdout, argv[0], verb); exit(0); } } // It's up to the parse func to print its usage on CLI-parse failure. mapper_t* pmapper = pmapper_setup->pparse_func(&argi, argc, argv); if (pmapper == NULL) { exit(1); } sllv_append(popts->pmapper_list, pmapper); if (argi >= argc || !streq(argv[argi], "then")) break; argi++; } popts->filenames = &argv[argi]; // No filenames means read from standard input, and standard input cannot be mmapped. if (argi == argc) popts->use_mmap_for_read = FALSE; popts->plrec_reader = lrec_reader_alloc(popts->ifile_fmt, popts->use_mmap_for_read, popts->irs, popts->ifs, popts->allow_repeat_ifs, popts->ips, popts->allow_repeat_ips, popts->use_implicit_csv_header, popts->json_flatten_separator); if (popts->plrec_reader == NULL) { main_usage(stderr, argv[0]); exit(1); } if (have_rand_seed) { mtrand_init(rand_seed); } else { mtrand_init_default(); } return popts; }
// ---------------------------------------------------------------- static char* test_lhmss() { lhmss_t *pmap = lhmss_alloc(); mu_assert_lf(pmap->num_occupied == 0); mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); mu_assert_lf(!lhmss_has_key(pmap, "x")); mu_assert_lf(lhmss_get(pmap, "x") == NULL); mu_assert_lf(!lhmss_has_key(pmap, "y")); mu_assert_lf(lhmss_get(pmap, "y") == NULL); mu_assert_lf(!lhmss_has_key(pmap, "z")); mu_assert_lf(lhmss_get(pmap, "z") == NULL); mu_assert_lf(lhmss_check_counts(pmap)); lhmss_put(pmap, "x", "3"); mu_assert_lf(pmap->num_occupied == 1); mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); mu_assert_lf( lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "3")); mu_assert_lf(!lhmss_has_key(pmap, "y")); mu_assert_lf(lhmss_get(pmap, "y") == NULL); mu_assert_lf(!lhmss_has_key(pmap, "z")); mu_assert_lf(lhmss_get(pmap, "z") == NULL); mu_assert_lf(lhmss_check_counts(pmap)); lhmss_put(pmap, "y", "5"); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); mu_assert_lf( lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "3")); mu_assert_lf( lhmss_has_key(pmap, "y")); mu_assert_lf(streq(lhmss_get(pmap, "y"), "5")); mu_assert_lf(!lhmss_has_key(pmap, "z")); mu_assert_lf(lhmss_get(pmap, "z") == NULL); mu_assert_lf(lhmss_check_counts(pmap)); lhmss_put(pmap, "x", "4"); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); mu_assert_lf( lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "4")); mu_assert_lf( lhmss_has_key(pmap, "y")); mu_assert_lf(streq(lhmss_get(pmap, "y"), "5")); mu_assert_lf(!lhmss_has_key(pmap, "z")); mu_assert_lf(lhmss_get(pmap, "z") == NULL); mu_assert_lf(lhmss_check_counts(pmap)); lhmss_put(pmap, "z", "7"); mu_assert_lf(pmap->num_occupied == 3); mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); mu_assert_lf( lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "4")); mu_assert_lf( lhmss_has_key(pmap, "y")); mu_assert_lf(streq(lhmss_get(pmap, "y"), "5")); mu_assert_lf( lhmss_has_key(pmap, "z")); mu_assert_lf(streq(lhmss_get(pmap, "z"), "7")); mu_assert_lf(lhmss_check_counts(pmap)); lhmss_remove(pmap, "y"); mu_assert_lf(pmap->num_occupied == 2); mu_assert_lf(!lhmss_has_key(pmap, "w")); mu_assert_lf(lhmss_get(pmap, "w") == NULL); mu_assert_lf( lhmss_has_key(pmap, "x")); mu_assert_lf(streq(lhmss_get(pmap, "x"), "4")); mu_assert_lf(!lhmss_has_key(pmap, "y")); mu_assert_lf(lhmss_get(pmap, "y") == NULL); mu_assert_lf( lhmss_has_key(pmap, "z")); mu_assert_lf(streq(lhmss_get(pmap, "z"), "7")); mu_assert_lf(lhmss_check_counts(pmap)); lhmss_free(pmap); return NULL; }
// ---------------------------------------------------------------- cli_opts_t* parse_command_line(int argc, char** argv) { cli_opts_t* popts = mlr_malloc_or_die(sizeof(cli_opts_t)); memset(popts, 0, sizeof(*popts)); popts->irs = NULL; popts->ifs = NULL; popts->ips = NULL; popts->allow_repeat_ifs = NEITHER_TRUE_NOR_FALSE; popts->allow_repeat_ips = NEITHER_TRUE_NOR_FALSE; popts->ors = NULL; popts->ofs = NULL; popts->ops = NULL; popts->ofmt = DEFAULT_OFMT; popts->oquoting = DEFAULT_OQUOTING; popts->plrec_reader = NULL; popts->plrec_writer = NULL; popts->filenames = NULL; popts->ifile_fmt = "dkvp"; popts->ofile_fmt = "dkvp"; popts->use_mmap_for_read = TRUE; int left_align_pprint = TRUE; int have_rand_seed = FALSE; unsigned rand_seed = 0; int argi = 1; for (; argi < argc; argi++) { if (argv[argi][0] != '-') break; else if (streq(argv[argi], "--version")) { #ifdef HAVE_CONFIG_H printf("Miller version >= %s.\n", PACKAGE_VERSION); #else printf("Miller version >= %s.\n", MLR_VERSION); #endif // HAVE_CONFIG_H exit(0); } else if (streq(argv[argi], "-h")) main_usage(argv[0], 0); else if (streq(argv[argi], "--help")) main_usage(argv[0], 0); else if (streq(argv[argi], "--help-all-verbs")) usage_all_verbs(argv[0]); else if (streq(argv[argi], "--help-all-functions") || streq(argv[argi], "-f")) { lrec_evaluator_function_usage(stdout, NULL); exit(0); } else if (streq(argv[argi], "--help-function") || streq(argv[argi], "--hf")) { check_arg_count(argv, argi, argc, 2); lrec_evaluator_function_usage(stdout, argv[argi+1]); exit(0); } else if (streq(argv[argi], "--rs")) { check_arg_count(argv, argi, argc, 2); popts->ors = sep_from_arg(argv[argi+1], argv[0]); popts->irs = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--irs")) { check_arg_count(argv, argi, argc, 2); popts->irs = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--ors")) { check_arg_count(argv, argi, argc, 2); popts->ors = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--fs")) { check_arg_count(argv, argi, argc, 2); popts->ofs = sep_from_arg(argv[argi+1], argv[0]); popts->ifs = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--ifs")) { check_arg_count(argv, argi, argc, 2); popts->ifs = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--ofs")) { check_arg_count(argv, argi, argc, 2); popts->ofs = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--repifs")) { popts->allow_repeat_ifs = TRUE; } else if (streq(argv[argi], "-p")) { popts->ifile_fmt = "nidx"; popts->ofile_fmt = "nidx"; popts->ifs = " "; popts->ofs = " "; popts->allow_repeat_ifs = TRUE; } else if (streq(argv[argi], "--ps")) { check_arg_count(argv, argi, argc, 2); popts->ops = sep_from_arg(argv[argi+1], argv[0]); popts->ips = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--ips")) { check_arg_count(argv, argi, argc, 2); popts->ips = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--ops")) { check_arg_count(argv, argi, argc, 2); popts->ops = sep_from_arg(argv[argi+1], argv[0]); argi++; } else if (streq(argv[argi], "--csv")) { popts->ifile_fmt = popts->ofile_fmt = "csv"; } else if (streq(argv[argi], "--icsv")) { popts->ifile_fmt = "csv"; } else if (streq(argv[argi], "--ocsv")) { popts->ofile_fmt = "csv"; } else if (streq(argv[argi], "--csvlite")) { popts->ifile_fmt = popts->ofile_fmt = "csvlite"; } else if (streq(argv[argi], "--icsvlite")) { popts->ifile_fmt = "csvlite"; } else if (streq(argv[argi], "--ocsvlite")) { popts->ofile_fmt = "csvlite"; } else if (streq(argv[argi], "--dkvp")) { popts->ifile_fmt = popts->ofile_fmt = "dkvp"; } else if (streq(argv[argi], "--idkvp")) { popts->ifile_fmt = "dkvp"; } else if (streq(argv[argi], "--odkvp")) { popts->ofile_fmt = "dkvp"; } else if (streq(argv[argi], "--nidx")) { popts->ifile_fmt = popts->ofile_fmt = "nidx"; } else if (streq(argv[argi], "--inidx")) { popts->ifile_fmt = "nidx"; } else if (streq(argv[argi], "--onidx")) { popts->ofile_fmt = "nidx"; } else if (streq(argv[argi], "--xtab")) { popts->ifile_fmt = popts->ofile_fmt = "xtab"; } else if (streq(argv[argi], "--ixtab")) { popts->ifile_fmt = "xtab"; } else if (streq(argv[argi], "--oxtab")) { popts->ofile_fmt = "xtab"; } else if (streq(argv[argi], "--ipprint")) { popts->ifile_fmt = "csvlite"; popts->ifs = " "; popts->allow_repeat_ifs = TRUE; } else if (streq(argv[argi], "--opprint")) { popts->ofile_fmt = "pprint"; } else if (streq(argv[argi], "--pprint")) { popts->ifile_fmt = "csvlite"; popts->ifs = " "; popts->allow_repeat_ifs = TRUE; popts->ofile_fmt = "pprint"; } else if (streq(argv[argi], "--right")) { left_align_pprint = FALSE; } else if (streq(argv[argi], "--ofmt")) { check_arg_count(argv, argi, argc, 2); popts->ofile_fmt = argv[argi+1]; argi++; } else if (streq(argv[argi], "--quote-all")) { popts->oquoting = QUOTE_ALL; } else if (streq(argv[argi], "--quote-none")) { popts->oquoting = QUOTE_NONE; } else if (streq(argv[argi], "--quote-minimal")) { popts->oquoting = QUOTE_MINIMAL; } else if (streq(argv[argi], "--quote-numeric")) { popts->oquoting = QUOTE_NUMERIC; } // xxx put into online help. else if (streq(argv[argi], "--mmap")) { popts->use_mmap_for_read = TRUE; } else if (streq(argv[argi], "--no-mmap")) { popts->use_mmap_for_read = FALSE; } else if (streq(argv[argi], "--seed")) { check_arg_count(argv, argi, argc, 2); if (sscanf(argv[argi+1], "0x%x", &rand_seed) == 1) { have_rand_seed = TRUE; } else if (sscanf(argv[argi+1], "%u", &rand_seed) == 1) { have_rand_seed = TRUE; } else { main_usage(argv[0], 1); } argi++; } else nusage(argv[0], argv[argi]); } lhmss_t* default_rses = get_default_rses(); lhmss_t* default_fses = get_default_fses(); lhmss_t* default_pses = get_default_pses(); lhmsi_t* default_repeat_ifses = get_default_repeat_ifses(); lhmsi_t* default_repeat_ipses = get_default_repeat_ipses(); if (popts->irs == NULL) popts->irs = lhmss_get(default_rses, popts->ifile_fmt); if (popts->ifs == NULL) popts->ifs = lhmss_get(default_fses, popts->ifile_fmt); if (popts->ips == NULL) popts->ips = lhmss_get(default_pses, popts->ifile_fmt); if (popts->allow_repeat_ifs == NEITHER_TRUE_NOR_FALSE) popts->allow_repeat_ifs = lhmsi_get(default_repeat_ifses, popts->ifile_fmt); if (popts->allow_repeat_ips == NEITHER_TRUE_NOR_FALSE) popts->allow_repeat_ips = lhmsi_get(default_repeat_ipses, popts->ifile_fmt); if (popts->ors == NULL) popts->ors = lhmss_get(default_rses, popts->ofile_fmt); if (popts->ofs == NULL) popts->ofs = lhmss_get(default_fses, popts->ofile_fmt); if (popts->ops == NULL) popts->ops = lhmss_get(default_pses, popts->ofile_fmt); if (popts->irs == NULL) { fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); exit(1); } if (popts->ifs == NULL) { fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); exit(1); } if (popts->ips == NULL) { fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); exit(1); } if (popts->allow_repeat_ifs == NEITHER_TRUE_NOR_FALSE) { fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); exit(1); } if (popts->allow_repeat_ips == NEITHER_TRUE_NOR_FALSE) { fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); exit(1); } if (popts->ors == NULL) { fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); exit(1); } if (popts->ofs == NULL) { fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); exit(1); } if (popts->ops == NULL) { fprintf(stderr, "%s: internal coding error detected in file %s at line %d.\n", argv[0], __FILE__, __LINE__); exit(1); } if (streq(popts->ofile_fmt, "pprint") && strlen(popts->ofs) != 1) { fprintf(stderr, "%s: OFS for PPRINT format must be single-character; got \"%s\".\n", argv[0], popts->ofs); return NULL; } if (streq(popts->ofile_fmt, "dkvp")) popts->plrec_writer = lrec_writer_dkvp_alloc(popts->ors, popts->ofs, popts->ops); else if (streq(popts->ofile_fmt, "csv")) popts->plrec_writer = lrec_writer_csv_alloc(popts->ors, popts->ofs, popts->oquoting); else if (streq(popts->ofile_fmt, "csvlite")) popts->plrec_writer = lrec_writer_csvlite_alloc(popts->ors, popts->ofs); else if (streq(popts->ofile_fmt, "nidx")) popts->plrec_writer = lrec_writer_nidx_alloc(popts->ors, popts->ofs); else if (streq(popts->ofile_fmt, "xtab")) popts->plrec_writer = lrec_writer_xtab_alloc(popts->ofs, popts->ops); else if (streq(popts->ofile_fmt, "pprint")) popts->plrec_writer = lrec_writer_pprint_alloc(popts->ors, popts->ofs[0], left_align_pprint); else { main_usage(argv[0], 1); } if ((argc - argi) < 1) { main_usage(argv[0], 1); } popts->pmapper_list = sllv_alloc(); while (TRUE) { check_arg_count(argv, argi, argc, 1); char* verb = argv[argi]; mapper_setup_t* pmapper_setup = look_up_mapper_setup(verb); if (pmapper_setup == NULL) { fprintf(stderr, "%s: verb \"%s\" not found. Please use \"%s --help\" for a list.\n", argv[0], verb, argv[0]); exit(1); } if ((argc - argi) >= 2) { if (streq(argv[argi+1], "-h") || streq(argv[argi+1], "--help")) { pmapper_setup->pusage_func(argv[0], verb); exit(0); } } // It's up to the parse func to print its usage on CLI-parse failure. mapper_t* pmapper = pmapper_setup->pparse_func(&argi, argc, argv); if (pmapper == NULL) { exit(1); } sllv_add(popts->pmapper_list, pmapper); // xxx cmt if (argi >= argc || !streq(argv[argi], "then")) break; argi++; } popts->filenames = &argv[argi]; // No filenames means read from standard input, and standard input cannot be mmapped. if (argi == argc) popts->use_mmap_for_read = FALSE; popts->plrec_reader = lrec_reader_alloc(popts->ifile_fmt, popts->use_mmap_for_read, popts->irs, popts->ifs, popts->allow_repeat_ifs, popts->ips, popts->allow_repeat_ips); if (popts->plrec_reader == NULL) main_usage(argv[0], 1); if (have_rand_seed) { mtrand_init(rand_seed); } else { mtrand_init_default(); } return popts; }
// ---------------------------------------------------------------- // xxx cmt stdout/err & 0/1 static void main_usage(char* argv0, int exit_code) { FILE* o = exit_code == 0 ? stdout : stderr; fprintf(o, "Usage: %s [I/O options] {verb} [verb-dependent options ...] {file names}\n", argv0); fprintf(o, "Verbs:\n"); char* leader = " "; char* separator = " "; int leaderlen = strlen(leader); int separatorlen = strlen(separator); int linelen = leaderlen; int j = 0; for (int i = 0; i < mapper_lookup_table_length; i++) { char* verb = mapper_lookup_table[i]->verb; int verblen = strlen(verb); linelen += separatorlen + verblen; if (linelen >= 80) { fprintf(o, "\n"); linelen = leaderlen + separatorlen + verblen; j = 0; } if (j == 0) fprintf(o, "%s", leader); fprintf(o, "%s%s", separator, verb); j++; } fprintf(o, "\n"); fprintf(o, "Example: %s --csv --rs lf --fs tab cut -f hostname,uptime file1.csv file2.csv\n", argv0); fprintf(o, "Please use \"%s -h\" or \"%s --help\" to show this message.\n", argv0, argv0); fprintf(o, "Please use \"%s --version\" to show the software version.\n", argv0); fprintf(o, "Please use \"%s {verb name} --help\" for verb-specific help.\n", argv0); fprintf(o, "Please use \"%s --help-all-verbs\" for help on all verbs.\n", argv0); fprintf(o, "\n"); lrec_evaluator_list_functions(o); fprintf(o, "Please use \"%s --help-function {function name}\" for function-specific help.\n", argv0); fprintf(o, "Please use \"%s --help-all-functions\" or \"%s -f\" for help on all functions.\n", argv0, argv0); fprintf(o, "\n"); fprintf(o, "Data-format options, for input, output, or both:\n"); fprintf(o, " --dkvp --idkvp --odkvp Delimited key-value pairs, e.g \"a=1,b=2\"\n"); fprintf(o, " (default)\n"); fprintf(o, " --nidx --inidx --onidx Implicitly-integer-indexed fields\n"); fprintf(o, " (Unix-toolkit style)\n"); fprintf(o, " --csv --icsv --ocsv Comma-separated value (or tab-separated\n"); fprintf(o, " with --fs tab, etc.)\n"); fprintf(o, " --pprint --ipprint --opprint --right Pretty-printed tabular (produces no\n"); fprintf(o, " output until all input is in)\n"); fprintf(o, " --xtab --ixtab --oxtab Pretty-printed vertical-tabular\n"); fprintf(o, " -p is a keystroke-saver for --nidx --fs space --repifs\n"); fprintf(o, "Separator options, for input, output, or both:\n"); fprintf(o, " --rs --irs --ors Record separators, e.g. 'lf' or '\\r\\n'\n"); fprintf(o, " --fs --ifs --ofs --repifs Field separators, e.g. comma\n"); fprintf(o, " --ps --ips --ops Pair separators, e.g. equals sign\n"); fprintf(o, " Notes:\n"); fprintf(o, " * IPS/OPS are only used for DKVP and XTAB formats, since only in these formats\n"); fprintf(o, " do key-value pairs appear juxtaposed.\n"); fprintf(o, " * IRS/ORS are ignored for XTAB format. Nominally IFS and OFS are newlines;\n"); fprintf(o, " XTAB records are separated by two or more consecutive IFS/OFS -- i.e.\n"); fprintf(o, " a blank line.\n"); fprintf(o, " * OFS must be single-character for PPRINT format. This is because it is used\n"); fprintf(o, " with repetition for alignment; multi-character separators would make\n"); fprintf(o, " alignment impossible.\n"); fprintf(o, " * OPS may be multi-character for XTAB format, in which case alignment is\n"); fprintf(o, " disabled.\n"); fprintf(o, " * DKVP, NIDX, CSVLITE, PPRINT, and XTAB formats are intended to handle\n"); fprintf(o, " platform-native text data. In particular, this means LF line-terminators\n"); fprintf(o, " by default on Linux/OSX. You can use \"--dkvp --rs crlf\" for\n"); fprintf(o, " CRLF-terminated DKVP files, and so on.\n"); fprintf(o, " * CSV is intended to handle RFC-4180-compliant data. In particular, this means\n"); fprintf(o, " it uses CRLF line-terminators by default. You can use \"--csv --rs lf\" for\n"); fprintf(o, " Linux-native CSV files.\n"); fprintf(o, " * You can specify separators in any of the following ways, shown by example:\n"); fprintf(o, " - Type them out, quoting as necessary for shell escapes, e.g.\n"); fprintf(o, " \"--fs '|' --ips :\"\n"); fprintf(o, " - C-style escape sequences, e.g. \"--rs '\\r\\n' --fs '\\t'\".\n"); fprintf(o, " - To avoid backslashing, you can use any of the following names:\n"); fprintf(o, " "); lhmss_t* pmap = get_desc_to_chars_map(); for (lhmsse_t* pe = pmap->phead; pe != NULL; pe = pe->pnext) { fprintf(o, " %s", pe->key); } fprintf(o, "\n"); fprintf(o, " * Default separators by format:\n"); fprintf(o, " %-12s %-8s %-8s %s\n", "File format", "RS", "FS", "PS"); lhmss_t* default_rses = get_default_rses(); lhmss_t* default_fses = get_default_fses(); lhmss_t* default_pses = get_default_pses(); for (lhmsse_t* pe = default_rses->phead; pe != NULL; pe = pe->pnext) { char* filefmt = pe->key; char* rs = pe->value; char* fs = lhmss_get(default_fses, filefmt); char* ps = lhmss_get(default_pses, filefmt); fprintf(o, " %-12s %-8s %-8s %s\n", filefmt, rebackslash(rs), rebackslash(fs), rebackslash(ps)); } fprintf(o, "Double-quoting for CSV output:\n"); fprintf(o, " --quote-all Wrap all fields in double quotes\n"); fprintf(o, " --quote-none Do not wrap any fields in double quotes, even if they have \n"); fprintf(o, " OFS or ORS in them\n"); fprintf(o, " --quote-minimal Wrap fields in double quotes only if they have OFS or ORS\n"); fprintf(o, " in them (default)\n"); fprintf(o, " --quote-numeric Wrap fields in double quotes only if they have numbers\n"); fprintf(o, " in them\n"); fprintf(o, "Numerical formatting:\n"); fprintf(o, " --ofmt {format} E.g. %%.18lf, %%.0lf. Please use sprintf-style codes for\n"); fprintf(o, " double-precision. Applies to verbs which compute new\n"); fprintf(o, " values, e.g. put, stats1, stats2. See also the fmtnum\n"); fprintf(o, " function within mlr put (mlr --help-all-functions).\n"); fprintf(o, "Other options:\n"); fprintf(o, " --seed {n} with n of the form 12345678 or 0xcafefeed. For put/filter urand().\n"); fprintf(o, "Output of one verb may be chained as input to another using \"then\", e.g.\n"); fprintf(o, " %s stats1 -a min,mean,max -f flag,u,v -g color then sort -f color\n", argv0); fprintf(o, "For more information please see http://johnkerl.org/miller/doc and/or\n"); fprintf(o, "http://github.com/johnkerl/miller."); #ifdef HAVE_CONFIG_H fprintf(o, " This is Miller version >= %s.\n", PACKAGE_VERSION); #else fprintf(o, " This is Miller version >= %s.\n", MLR_VERSION); #endif // HAVE_CONFIG_H exit(exit_code); }