const int usage_train() { print("Usage: salad train [options]\n" "\n" "I/O options:\n" " -i, --input <file> The input filename.\n" " -f, --input-format <fmt> Sets the format of input. This option might be \n" " one of " IOMODES ".\n" #ifdef USE_REGEX_FILTER " --input-filter <regex> The regular expression for filtering input lines\n" " or filenames respectively.\n" #endif " --batch-size <num> Set the size of batches that are read and \n" " processed in one go (Default: %"ZU").\n" #ifdef USE_NETWORK " -p, --pcap-filter <str> Filter expression for the PCAP library in case\n" " network data is processed (Default: %s).\n" " --client-only Only consider the client-side of the network\n" " communication.\n" " --server-only Only consider the server-side of the network\n" " communication.\n" #endif " -u, --update-model In case the specified output file exists and\n" " contains a valid model this flag indicates\n" " that that model should be update rather than\n" " recreated from scratch.\n" " -o, --output <file> The output filename.\n" #ifdef USE_ARCHIVES // If there is no libarchive support we can only make use of text-based configurations. " -F, --output-format <fmt> Sets the format of output. This option might be \n" " one of " SALAD_OUTPUTFMTS ".\n" #endif "\n" "Feature options:\n" " -n, --ngram-len <num> Set length of n-grams (Default: %"ZU").\n" " -d, --ngram-delim <delim> Set delimiters for the use of word/ token n-grams.\n" " If omitted or empty byte n-grams are used.\n" " --binary Indicates to use bit n-grams rather than byte\n" " or token n-grams and consequently, disables the\n" " --ngram-delim option.\n" " -s, --filter-size <num> Set the size of the bloom filter as bits of\n" " the index (Default: %u).\n" " --hash-set <hashes> Set the hash set to be used: 'simple', 'simple2'\n" " or 'murmur' (Default: '%s').\n" "\n" "Generic options:\n" " -e, --echo-params Echo used parameters and settings.\n" " -q, --quiet Suppress all output but warning and errors.\n" " -h, --help Print this help screen.\n", /* --batch-size */ (SIZE_T) DEFAULT_CONFIG.batch_size, #ifdef USE_NETWORK /* --pcap-filter */ DEFAULT_CONFIG.pcap_filter, #endif /* --ngram-len */ (SIZE_T) DEFAULT_CONFIG.ngram_length, /* --filter-size */ DEFAULT_CONFIG.filter_size, /* --hash-set */ hashset_to_string(DEFAULT_CONFIG.hash_set)); return EXIT_SUCCESS; }
const int usage_inspect() { print("Usage: salad inspect [options]\n" "\n" "I/O options:\n" " -i, --input <file> The input filename.\n" " -f, --input-format <fmt> Sets the format of input. This option might be \n" " one of " IOMODES ".\n" #ifdef USE_REGEX_FILTER " --input-filter <regex> The regular expression for filtering input lines\n" " or filenames respectively.\n" #endif " --batch-size <num> Set the size of batches that are read and \n" " processed in one go (Default: %"ZU").\n" #ifdef USE_NETWORK " -p, --pcap-filter <str> Filter expression for the PCAP library in case\n" " network data is processed (Default: %s).\n" " --client-only Only consider the client-side of the network\n" " communication.\n" " --server-only Only consider the server-side of the network\n" " communication.\n" #endif " -b, --bloom <file> The bloom filter to be used.\n" " -o, --output <file> The output filename.\n" "\n" "Feature options:\n" " -n, --ngram-len <num> Set length of n-grams (Default: %"ZU").\n" " -d, --ngram-delim <delim> Set delimiters for the use of word/ token n-grams.\n" " If omitted or empty byte n-grams are used.\n" " --binary Indicates to use bit n-grams rather than byte\n" " or token n-grams and consequently, disables the\n" " --ngram-delim option.\n" " -s, --filter-size <num> Set the size of the bloom filter as bits of\n" " the index (Default: %u).\n" " --hash-set <hashes> Set the hash set to be used: 'simple', 'simple2'\n" " or 'murmur' (Default: '%s').\n" "\n" "Generic options:\n" " -e, --echo-params Echo used parameters and settings.\n" " -h, --help Print this help screen.\n", /* --batch-size */ (SIZE_T) DEFAULT_CONFIG.batch_size, #ifdef USE_NETWORK /* --pcap-filter */ DEFAULT_CONFIG.pcap_filter, #endif /* --ngram-len */ (SIZE_T) DEFAULT_CONFIG.ngram_length, /* --filter-size */ DEFAULT_CONFIG.filter_size, /* --hash-set */ hashset_to_string(DEFAULT_CONFIG.hash_set)); return EXIT_SUCCESS; }
const saladstate_t parse_traininglike_options_ex(int argc, char* argv[], config_t* const config, const char *shortopts, const struct option *longopts) { assert(argv != NULL); assert(config != NULL); int option, bs = FALSE, fo = FALSE; while ((option = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) { switch (option) { case 'i': config->input = optarg; break; case 'f': config->input_type = as_iomode(optarg); break; case OPTION_INPUTFILTER: config->input_filter = optarg; break; case OPTION_BATCHSIZE: { int batch_size = atoi(optarg); // TODO: strtol if (batch_size <= 0) { warn("Illegal batch size specified.\n"); // This is not true in case of network data as input. Therefore, // we simply suppress this output at this point. // warn("Defaulting to: %u\n", (unsigned int) config->batch_size); } else { bs = TRUE; config->batch_size = batch_size; } break; } #ifdef USE_NETWORK case 'p': config->pcap_filter = optarg; break; #endif case 'b': config->bloom = optarg; break; case 'u': config->update_model = TRUE; break; case 'o': config->output = optarg; break; case 'n': { fo = TRUE; int ngramLength = atoi(optarg); // TODO: strtol if (ngramLength <= 0) { warn("Illegal n-gram length specified."); warn("Defaulting to: %u\n", (unsigned int) config->ngramLength); } else config->ngramLength = ngramLength; break; } case 'd': fo = TRUE; config->delimiter = optarg; break; case 's': { fo = TRUE; int filter_size = atoi(optarg); // TODO: strtol if (filter_size <= 0) { warn("Illegal filter size specified."); warn("Defaulting to: %u\n", (unsigned int) config->filter_size); } else config->filter_size = filter_size; break; } case OPTION_HASHSET: { fo = TRUE; hashset_t hashset = to_hashset(optarg); if (hashset == HASHES_UNDEFINED) { warn("Illegal hash set specified."); warn("Defaulting to: %s\n", hashset_to_string(config->hash_set)); } else config->hash_set = hashset; break; } case 'e': config->echo_params = TRUE; break; case '?': case 'h': return SALAD_HELP_TRAIN; default: // In order to catch program argument that correspond to // features that were excluded at compile time. fprintf(stderr, "invalid option -- '%c'\n", option); return SALAD_HELP_TRAIN; } } config->transfer_spec = !fo; if (check_input(config, TRUE, bs) == EXIT_FAILURE) return SALAD_EXIT; if (check_output(config) == EXIT_FAILURE) return SALAD_EXIT; if (config->echo_params) { if (config->update_model && config->transfer_spec) { // cf. salad_train_stub } else { echo_options(config); } } return SALAD_RUN; }
const saladstate_t parse_traininglike_options_ex(int argc, char* argv[], config_t* const config, const char *shortopts, const struct option *longopts) { assert(argv != NULL); assert(config != NULL); char* end; // For parsing numbers with strto* int conly = FALSE, sonly = FALSE; int option, bs = FALSE, fo = FALSE; while ((option = getopt_long(argc, argv, shortopts, longopts, NULL)) != -1) { switch (option) { case 'i': config->input = optarg; break; case 'f': config->input_type = as_inputmode(optarg); break; case OPTION_INPUTFILTER: config->input_filter = optarg; break; case OPTION_BATCHSIZE: { const long long int batch_size = strtoll(optarg, &end, 10); if (batch_size <= 0) { warn("Illegal batch size specified.\n"); // This is not true in case of network data as input. Therefore, // we simply suppress this output at this point. // warn("Defaulting to: %u\n", (unsigned int) config->batch_size); } else { bs = TRUE; config->batch_size = (size_t) MIN(SIZE_MAX, (unsigned long) MAX(0, batch_size)); } break; } #ifdef USE_NETWORK case 'p': config->pcap_filter = optarg; break; case OPTION_NETCLIENT: conly = TRUE; break; case OPTION_NETSERVER: sonly = TRUE; break; #endif case 'b': config->bloom = optarg; break; case 'u': config->update_model = TRUE; break; case 'o': config->output = optarg; break; case 'F': config->output_type = as_outputmode(optarg); break; case 'n': { fo = TRUE; const long long int ngram_length = strtoll(optarg, &end, 10); if (ngram_length <= 0) { warn("Illegal n-gram length specified."); warn("Defaulting to: %"ZU"\n", (SIZE_T) config->ngram_length); } else config->ngram_length = (size_t) MIN(SIZE_MAX, (unsigned long) ngram_length); break; } case 'd': fo = TRUE; config->delimiter = optarg; break; case OPTION_BINARY: config->binary_ngrams = TRUE; break; case 's': { fo = TRUE; const long long int filter_size = strtoll(optarg, &end, 10); if (filter_size <= 0) { warn("Illegal filter size specified."); warn("Defaulting to: %u\n", (unsigned int) config->filter_size); } else config->filter_size = (unsigned int) MIN(UINT_MAX, (unsigned long) MAX(0, filter_size)); break; } case OPTION_HASHSET: { fo = TRUE; hashset_t hashset = to_hashset(optarg); if (hashset == HASHES_UNDEFINED) { warn("Illegal hash set specified."); warn("Defaulting to: %s\n", hashset_to_string(config->hash_set)); } else config->hash_set = hashset; break; } case 'e': config->echo_params = TRUE; break; case 'q': log_level = WARNING; break; case '?': case 'h': log_level = STATUS; return SALAD_HELP_TRAIN; default: // In order to catch program argument that correspond to // features that were excluded at compile time. fprintf(stderr, "invalid option -- '%c'\n", option); return SALAD_HELP_TRAIN; } } config->transfer_spec = !fo; if (config->binary_ngrams && config->ngram_length > MASK_BITSIZE) { error("When using binary n-grams currently only a maximal"); error("length of %u bits is supported.", MASK_BITSIZE); return SALAD_EXIT; } if (check_netparams(config, conly, sonly) == EXIT_FAILURE) return SALAD_HELP_TRAIN; if (check_input(config, TRUE, bs) == EXIT_FAILURE) return SALAD_EXIT; if (check_output(config) == EXIT_FAILURE) return SALAD_EXIT; if (config->echo_params) { if (config->update_model && config->transfer_spec) { // cf. salad_train_stub } else { echo_options(config); } } return SALAD_RUN; }