Example #1
0
int key_strcmp(char **a, char **b) {
  char fa[256], fb[256];
  int retval = 0;
  int i;
  size_t alen, blen;

  /* avoid comparing nulls */
  if (!*a && !*b)
    return 0;
  if (!*a && *b)
    return -1;
  if (*a && !*b)
    return 1;

  i = 0;
  while (retval == 0) {
    alen = get_line_field(fa, *a, 255, i, delim);
    blen = get_line_field(fb, *b, 255, i, delim);
    if (alen < 0 || blen < 0)
      break;
    retval = strcoll(fa, fb);
    i++;
  }

  return retval;
}
Example #2
0
/* load the filter from the filter file */
static int load_filter(struct fkeys_conf *conf, dbfr_t *filter_reader) {
  char *t_keybuf;
  int i, acum_len;

  ht_init(&conf->filter, 1024, NULL, NULL);
  while (dbfr_getline(filter_reader) > 0) {

    t_keybuf = (char *) xmalloc(filter_reader->current_line_sz);
    for (acum_len = 0, i = 0; i < conf->key_count; i++) {
      acum_len += get_line_field(t_keybuf + acum_len,
                                 filter_reader->current_line,
                                 filter_reader->current_line_sz - acum_len,
                                 conf->aindexes[i], delim);
      if (i != conf->key_count -1) {
        strcat(t_keybuf + acum_len, delim);
        acum_len += delim_len;
      }
    }
    if (acum_len > 0)
      ht_put(&conf->filter, t_keybuf, (void*)0xDEADBEEF);
      //bst_insert(&conf->ftree, t_keybuf);
  }
  conf->key_buffer_sz = filter_reader->current_line_sz;

  return 0;
}
Example #3
0
ssize_t field_str(const char *value, const char *line, const char *delim) {
  char *curfield;       /* to hold fields from line */
  int max_field_chars;  /* size of curfield buffer */
  int curfield_len;     /* return value of get_line_field() */
  int i;                /* the index of the field being inspected */
  int found;            /* whether the value was found in line */

  /* no value to look for?  don't waste our time.
     but looking for an empty string may be valid. */
  if (value == NULL)
    return -2;

  /* undefined or empty line?  then it can't contain the value. */
  if (line == NULL || line[0] == '\0')
    return -1;

  /* no delimiter? then treat the line like a single field. */
  if (delim == NULL || delim[0] == '\0') {
    if (str_eq(value, line))
      return 0;
    return -1;
  }

  /* TODO(jhinds): get rid of the malloc'd buffer for holding line fields.
   * This could be done better, e.g. by using get_line_pos(). */

  /* this only needs to be just long enough to see if the
     field matches value (1 char longer), but making it a little
     bigger, just for fun.  and allocating max+1 so there's room for
     the null terminator. */
  max_field_chars = strlen(value) + 3;
  curfield = xmalloc(max_field_chars + 1);

  i = 0;
  curfield_len = 0;
  found = 0;

  while ((curfield_len = get_line_field(curfield, line,
                                        max_field_chars, i, delim)) > -1) {
    if (str_eq(curfield, value)) {
      found = 1;
      break;
    }
    i++;
  }

  free(curfield);

  if (found)
    return i;

  return -1;
}
Example #4
0
int compare_keys(char *buffer_left, char *buffer_right) {
  int keycmp = 0;
  int i;
  char field_left[MAX_FIELD_LEN + 1];
  char field_right[MAX_FIELD_LEN + 1];

  // printf("inside compare_keys([%s], [%s])\n", buffer_left, buffer_right);
  if (buffer_left == NULL && buffer_right == NULL) {
    return LEFT_RIGHT_EQUAL;
  }

  /* these special cases may seem counter-intuitive, but saying that
     a NULL line is greater than a non-NULL line results in
     the non-NULL line getting printed and a new line read in.
   */
  if (buffer_left == NULL)
    return LEFT_GREATER;

  if (buffer_right == NULL)
    return RIGHT_GREATER;

  for (i = 0; i < nkeys; i++) {
    get_line_field(field_left, buffer_left, MAX_FIELD_LEN, keyfields[i], delim);
    get_line_field(field_right, buffer_right, MAX_FIELD_LEN, keyfields[i],
                   delim);
    /* printf("Comparing (%s) to (%s) inside compare_keys\n", field_left, field_right); */
    if ((keycmp = strcoll(field_left, field_right)) != 0)
      break;
  }

  /* ensure predictable return values */
  if (keycmp == 0)
    return 0;
  if (keycmp < 0)
    return -1;
  if (keycmp > 0)
    return 1;
}
Example #5
0
void extract_fields_to_string(char *line, char *destbuf, size_t destbuf_sz,
                              int *fields, size_t nfields, char *delim) {
  char *pos;
  int i;
  size_t delim_len, field_len;

  delim_len = strlen(delim);
  pos = destbuf;

  for (i = 0; i < nfields; i++) {
    field_len =
      get_line_field(pos, line, destbuf_sz - (pos - destbuf), fields[i], delim);
    pos += field_len;
    if (i != nfields - 1) {
      strcat(pos, delim);
      pos += delim_len;
    }
  }
}
Example #6
0
/** @brief Extracts a list of fields from a string and stores them in a target
  * buffer.
  *
  * The field separator used in the input string can be different from the
  * output field separator.
  *
  * @param field_list an array of 0-based indexes.
  * @param n_fields the number of elements in field_list.
  * @param line the input string.
  * @param target the output string buffer.
  * @param target_sz the size of target.
  * @param ifs field separator used in line.
  * @param ofs field separator to use in target.
  */
static void extract_fields(int *field_list, size_t n_fields,
                           const char *line, char *target, size_t target_sz,
                           const char *ifs, const char *ofs) {
  int i;
  size_t target_len = 0,
         field_len,
         ofs_len = strlen(ofs);
  target[0] = '\0';

  for (i=0; i < n_fields; i++) {
    field_len = get_line_field(target + target_len, line,
                               target_sz - target_len, field_list[i], ifs);
    if (field_len < 0) {
      /* TODO(jhinds): Maybe do something better than silently treating missing
       * fields as empty. */
      field_len = 0;
    }
    target_len += field_len;
    if (i < n_fields - 1) {
      strncat(target, ofs, target_sz - target_len);
      target_len += ofs_len;
    }
  }
}
Example #7
0
/** @brief  
  * 
  * @param args contains the parsed cmd-line options & arguments.
  * @param argc number of cmd-line arguments.
  * @param argv list of cmd-line arguments
  * @param optind index of the first non-option cmd-line argument.
  * 
  * @return exit status for main() to return.
  */
int pivot(struct cmdargs *args, int argc, char *argv[], int optind) {

  int i, j, tmplen;

  char default_delim[] = { 0xFE, 0x00 };

  hashtbl_t key_hash;           /* outer hash */
  hashtbl_t *pivot_hash;        /* pointer for inner hashes */

  struct pivot_conf conf;
  /* variables for keeping track of the unique pivot strings */
  hashtbl_t uniq_pivots;        /* set of all pivot strings */
  char **pivot_array;           /* list of all pivot keys */
  size_t n_key_strings;         /* number of distinct key strings */
  size_t n_pivot_keys;          /* number of distinct pivot field values */

  double *line_values;          /* array of values */

  char *keystr, *pivstr;        /* hash key strings */
  size_t keystr_sz, pivstr_sz;

  char **headers = NULL;        /* array of header labels */
  size_t n_headers = 0;         /* number of fields */

  char *fieldbuf = NULL;        /* to hold fields extracted from input */
  size_t fieldbuf_sz = 0;       /* size of field buffer */

  FILE *fin;                    /* input file */
  dbfr_t *in_reader;
  size_t max_line_sz = 0;

  char empty_string[] = "";

  if (!args->delim) {
    args->delim = getenv("DELIMITER");
    if (!args->delim)
      args->delim = default_delim;
  }
  expand_chars(args->delim);

  delim = args->delim;

  /* get first input file pointer - either trailing arg or stdin */
  if (optind == argc)
    fin = stdin;
  else
    fin = nextfile(argc, argv, &optind, "r");
  if (!fin) {
    fprintf(stderr, "%s: no valid input files specified.\n", argv[0]);
    return EXIT_FILE_ERR;
  }
  in_reader = dbfr_init(fin);

  /* set locale with values from the environment so strcoll()
     will work correctly. */
  setlocale(LC_ALL, "");
  setlocale(LC_COLLATE, "");

  memset(&conf, 0, sizeof(conf));
  if (configure_pivot(&conf, args, in_reader->next_line, delim) != 0) {
    fprintf(stderr, "%s: error parsing input field arguments.\n", argv[0]);
    return EXIT_HELP;
  }
  if (conf.n_pivots == 0 || conf.n_values == 0) {
    fprintf(stderr, "%s: -p/-P and -v/-A must be specified.\n", argv[0]);
    return EXIT_HELP;
  }

  /* TODO: get rid of this arbirary field length limitation */
  fieldbuf = xmalloc(MAX_FIELD_LEN);
  fieldbuf_sz = MAX_FIELD_LEN;

  /* extract headers from first line of input if necessary */
  if (args->keep_header) {

    if (dbfr_getline(in_reader) < 1) {
      fprintf(stderr, "%s: unexpected end of file.\n", argv[0]);
      return EXIT_FILE_ERR;
    }
    chomp(in_reader->current_line);
    n_headers = fields_in_line(in_reader->current_line, delim);
    headers = xmalloc(sizeof(char *) * n_headers);

    for (i = 0; i < n_headers; i++) {
      get_line_field(fieldbuf, in_reader->current_line, fieldbuf_sz - 1,
                     i, delim);
      headers[i] = xmalloc(sizeof(char *) * strlen(fieldbuf) + 1);
      strcpy(headers[i], fieldbuf);
    }

#ifdef CRUSH_DEBUG
    for (i = 0; i < n_headers; i++) {
      fprintf(stderr, "%s%s", headers[i], i < n_headers - 1 ? args->delim : "");
    }
    fprintf(stderr, "\n");
#endif
  }

  /* these two buffers will have enough capacity to hold the entire input line,
     unless there are no key fields specified, in which case keystr will just
     be set to an empty string.
   */
  keystr = pivstr = NULL;
  keystr_sz = pivstr_sz = 0;

  ht_init(&key_hash, KEY_HASH_SZ, NULL, free_hash);
  ht_init(&uniq_pivots, PIVOT_HASH_SZ, NULL, NULL);
  n_key_strings = 0;
  n_pivot_keys = 0;

  /* no keys specified?  set keystr to an empty string */
  if (!conf.n_keys) {
    keystr = empty_string;
  }

  while (fin != NULL) {

    while (dbfr_getline(in_reader) > 0) {
      int value_in_hash = 1;
      int pivot_in_hash = 1;

      chomp(in_reader->current_line);
      if (conf.n_keys) {
        /* this could validly return NULL if both sizes are 0 the first time thru,
           when keystr is still NULL, but that shouldn't happen */
        if (realloc_if_needed(&keystr, &keystr_sz,
                              in_reader->current_line_sz) == NULL) {
          fprintf(stderr, "%s: out of memory.\n", getenv("_"));
          break;
        }
      }

      if (conf.n_pivots) {
        if (realloc_if_needed(&pivstr, &pivstr_sz,
                              in_reader->current_line_sz) == NULL) {
          fprintf(stderr, "%s: out of memory.\n", getenv("_"));
          break;
        }
      }

      /* make key string from keys[] */
      if (conf.n_keys)
        extract_fields_to_string(in_reader->current_line, keystr, keystr_sz,
                                 conf.keys, conf.n_keys, delim);

      /* make key string from pivots[] */
      extract_fields_to_string(in_reader->current_line, pivstr, pivstr_sz,
                               conf.pivots, conf.n_pivots, delim);

#ifdef CRUSH_DEBUG
      if (n_keys)
        fprintf(stderr, "key string: %s\n", keystr);
      if (n_pivots)
        fprintf(stderr, "pivot string: %s\n", pivstr);
#endif

      /* get hashtable value */
      pivot_hash = (hashtbl_t *) ht_get(&key_hash, keystr);
      if (!pivot_hash) {
        pivot_hash = xmalloc(sizeof(hashtbl_t));
        ht_init(pivot_hash, PIVOT_HASH_SZ, NULL, free);
        pivot_in_hash = 0;
      }

      line_values = ht_get(pivot_hash, pivstr);
      if (!line_values) {
        line_values = xmalloc(sizeof(double) * conf.n_values);
        memset(line_values, 0, sizeof(double) * conf.n_values);
        value_in_hash = 0;
      }


      /* add in values */
      for (i = 0; i < conf.n_values; i++) {
        tmplen =
          get_line_field(fieldbuf, in_reader->current_line, fieldbuf_sz - 1,
                         conf.values[i], delim);
        if (tmplen > 0) {
          line_values[i] += atof(fieldbuf);

          /* remember the greatest input floating-point precision for each
           * field */
          tmplen = float_str_precision(fieldbuf);
          if (conf.value_precisions[i] < tmplen) {
#ifdef CRUSH_DEBUG
            fprintf(stderr, "setting precision to %d for field %d\n", tmplen,
                    i);
#endif
            conf.value_precisions[i] = tmplen;
          }
        }
      }

      /* store hashtable value */
      if (!value_in_hash)
        ht_put(pivot_hash, pivstr, line_values);

      if (!pivot_in_hash) {
        ht_put(&key_hash, keystr, pivot_hash);
      }

      /* store the pivot key string for later use */
      ht_put(&uniq_pivots, pivstr, (void *) 1);
    }

    if (in_reader->current_line_sz > max_line_sz)
      max_line_sz = in_reader->current_line_sz;

    dbfr_close(in_reader);
    fin = nextfile(argc, argv, &optind, "r");
    if (fin) {
      in_reader = dbfr_init(fin);
      /* reconfigure in case the fields are rearranged in the new file */
      if (configure_pivot(&conf, args, in_reader->next_line, delim) != 0) {
        fprintf(stderr, "%s: error parsing input field arguments.\n", argv[0]);
        return EXIT_HELP;
      }
      /* throw out headers from all files after the first. */
      if (args->keep_header)
        dbfr_getline(in_reader);
    }
  }

  n_key_strings = key_hash.nelems;
  n_pivot_keys = uniq_pivots.nelems;

  /* sort the collection of all pivot key strings */
  pivot_array = xmalloc(sizeof(char *) * n_pivot_keys);
  ht_keys(&uniq_pivots, pivot_array);
  qsort(pivot_array, n_pivot_keys, sizeof(char *),
        (int (*)(const void *, const void *)) key_strcmp);
#ifdef CRUSH_DEBUG
  fprintf(stderr, "sorted pivot strings:\n");
  for (i = 0; i < n_pivot_keys; i++) {
    fprintf(stderr, "\t%s\n", pivot_array[i]);
  }
#endif

  /* OUTPUT SECTION */

  /* print headers separate from data if necessary */
  if (args->keep_header) {
    char *pivot_label;

    /* assumption - the largest line of input has a greater length
       than the combined length of all pivot field values and a 3-char
       separator.  safe assumption?  probably not if every input field
       is used as a pivot field. */
    pivot_label = xmalloc(max_line_sz);

    if (conf.n_keys) {
      for (i = 0; i < conf.n_keys; i++)
        printf("%s%s", headers[conf.keys[i]], delim);
    }
    for (i = 0; i < n_pivot_keys; i++) {
      pivot_label[0] = 0x00;

      /* get the current pivot field values & build a label with them */
      for (j = 0; j < conf.n_pivots; j++) {
        get_line_field(fieldbuf, pivot_array[i], fieldbuf_sz - 1, j, delim);
        strcat(pivot_label, fieldbuf);
        if (j != conf.n_pivots - 1)
          strcat(pivot_label, " - ");
      }

      /* get the value field labels & print them with the pivot label */
      for (j = 0; j < conf.n_values; j++) {
        printf("%s: %s", pivot_label, headers[conf.values[j]]);
        if (j != conf.n_values - 1)
          fputs(delim, stdout);
      }
      /* TODO: segfault is happening around here */
      if (i != n_pivot_keys - 1)
        fputs(delim, stdout);

    }
    fputs("\n", stdout);

    free(pivot_label);

    /* free each header string - don't need them anymore */
    for (i = 0; i < n_headers; i++)
      free(headers[i]);
    free(headers);
  }


  {
    char **key_array;
    llist_node_t *key_node;
    llist_t *key_list;
    char *empty_value_string;

    /* construct string for empty value set.  this should be big enough for
       n_values worth of zeros (of the appropriate precision) and delimiters
       in between.  here we'll just guess that a precision of 8 is enough. */
    empty_value_string = xmalloc((sizeof(char) * conf.n_values * 8) +
                                 (strlen(delim) * conf.n_values));
    empty_value_string[0] = 0x00;
    for (i = 0; i < conf.n_values; i++) {
      sprintf(empty_value_string, "%s%.*f", empty_value_string,
              conf.value_precisions[i], 0.0F);
      if (i != conf.n_values - 1)
        strcat(empty_value_string, delim);
    }

    key_array = xmalloc(sizeof(char *) * n_key_strings);
    j = ht_keys(&key_hash, key_array);

    /* j now holds the number of distinct keys to be output */
    assert(j == n_key_strings);

    /* sort the keys */
    qsort(key_array, n_key_strings, sizeof(char *),
          (int (*)(const void *, const void *)) key_strcmp);

    /* loop through all key strings */
    for (i = 0; i < n_key_strings; i++) {
      int k;
      pivot_hash = ht_get(&key_hash, key_array[i]);

      if (n_key_strings > 0)
        printf("%s%s", key_array[i], delim);

      /* loop through all possible pivot-string inner hashtable keys */
      for (k = 0; k < n_pivot_keys; k++) {
        /* loop through all values */
        line_values = ht_get(pivot_hash, pivot_array[k]);
        if (!line_values)
          fputs(empty_value_string, stdout);
        else {
          for (j = 0; j < conf.n_values; j++) {
            printf("%.*f%s", conf.value_precisions[j], line_values[j],
                   j != conf.n_values - 1 ? delim : "");
          }
        }
        if (k != n_pivot_keys - 1)
          fputs(delim, stdout);
      }
      fputs("\n", stdout);

    }
    free(empty_value_string);
    free(key_array);
  }

  /* CLEANUP SECTION */
  ht_destroy(&key_hash);
  ht_destroy(&uniq_pivots);

  if (keystr && keystr != empty_string)
    free(keystr);
  if (pivstr)
    free(pivstr);
  if (pivot_array)
    free(pivot_array);
  if (fieldbuf)
    free(fieldbuf);

  return EXIT_OKAY;
}
Example #8
0
/** @brief  
  * 
  * @param args contains the parsed cmd-line options & arguments.
  * @param argc number of cmd-line arguments.
  * @param argv list of cmd-line arguments
  * @param optind index of the first non-option cmd-line argument.
  * 
  * @return exit status for main() to return.
  */
int funiq(struct cmdargs *args, int argc, char *argv[], int optind) {

  char delim[] = { 0xfe, 0x00 };  /* the delimiter */
  int *fields = NULL;   /* array of field indexes */
  size_t fields_sz = 0; /* the size of the array */
  size_t n_fields;      /* the number of things in the array */

  FILE *in = NULL;
  dbfr_t *in_reader = NULL;

  char **prev_line;             /* fields from previous line of input */
  char cur_field[FIELD_LEN_LIMIT];

  int i;

  int dup_count = 1;            /* used with -c option */
  char linebreak[3];

  /* use the default delimiter if necessary */
  if (!args->delim) {
    args->delim = getenv("DELIMITER");
    if (!args->delim)
      args->delim = delim;
  }
  expand_chars(args->delim);

  /* Get the first file, skipping any 0-byte files. */
  if (optind < argc) {
    do {
      in = nextfile(argc, argv, &optind, "r");
      if (in)
        in_reader = dbfr_init(in);
    } while (in && in_reader->eof);
  } else {
    in = stdin;
    in_reader = dbfr_init(in);
  }

  if (!in && !in_reader) {
    fprintf(stderr, "%s: no valid input files\n", argv[0]);
    return EXIT_HELP;
  }

  if (in_reader->eof) {
    /* There were valid input files, but they were all empty. */
    return EXIT_OKAY;
  }

  if (args->fields)
    n_fields = expand_nums(args->fields, &fields, &fields_sz);
  else if (args->field_labels)
    n_fields = expand_label_list(args->field_labels, in_reader->next_line,
                                 args->delim, &fields, &fields_sz);
  if (n_fields < 0) {
    fprintf(stderr, "%s: error expanding field list\n", argv[0]);
    return EXIT_HELP;
  }

  /* prepare the array of previous field values */
  prev_line = xmalloc(sizeof(char *) * n_fields);
  for (i = 0; i < n_fields; i++) {
    prev_line[i] = xmalloc(sizeof(char *) * FIELD_LEN_LIMIT);
  }

  /* get the first line to seed the prev_line array */
  i = dbfr_getline(in_reader);

  /* preserve input linebreak style.  assume there can only be 1 or 2 chars
   * in a linebreak sequence */
  if (in_reader->current_line[i - 2] == '\r' ||
      in_reader->current_line[i - 2] == '\n') {
    linebreak[0] = in_reader->current_line[i - 2];
    linebreak[1] = in_reader->current_line[i - 1];
    linebreak[2] = '\0';
  } else {
    linebreak[0] = in_reader->current_line[i - 1];
    linebreak[1] = '\0';
  }
  chomp(in_reader->current_line);

  for (i = 0; i < n_fields; i++) {
    get_line_field(prev_line[i], in_reader->current_line, FIELD_LEN_LIMIT - 1,
                   fields[i] - 1, args->delim);
  }
  printf("%s", in_reader->current_line); /* first line is never a dup */

  while (in) {
    int matching_fields;

    while (dbfr_getline(in_reader) > 0) {
      chomp(in_reader->current_line);

      matching_fields = 0;

      for (i = 0; i < n_fields; i++) {
        /* extract the field from the input line */
        get_line_field(cur_field, in_reader->current_line, FIELD_LEN_LIMIT - 1,
                       fields[i] - 1, args->delim);

        /* see if the field is a duplicate */
        if (str_eq(cur_field, prev_line[i]))
          matching_fields++;

        /* store this line's value */
        strcpy(prev_line[i], cur_field);
      }

      /* if not all of the fields matched, the line
         wasn't a duplicate */
      if (matching_fields != n_fields) {

        if (args->count) {
          /* print the number of dups for
           * the previous output line */
          printf("%s%d%s", args->delim, dup_count, linebreak);
        } else {
          /* give the previous output line a linebreak */
          printf("%s", linebreak);
        }
        printf("%s", in_reader->current_line);
        dup_count = 1;
      } else {
        dup_count++;
      }
    }

    dbfr_close(in_reader);
    in = nextfile(argc, argv, &optind, "r");
    if (in)
      in_reader = dbfr_init(in);
  }

  if (args->count) {
    /* print the number of dups for the last output line */
    printf("%s%d%s", args->delim, dup_count, linebreak);
  } else {
    /* give the last output line a linebreak */
    printf("%s", linebreak);
  }

  for (i = 0; i < n_fields; i++) {
    free(prev_line[i]);
  }
  free(prev_line);
  free(fields);
  return EXIT_OKAY;
}
Example #9
0
/* parse key fields */
static int configure_filterkeys(struct fkeys_conf *conf,
                                struct cmdargs *args,
                                dbfr_t *filter_reader,
                                dbfr_t *stream_reader) {
  size_t arrsz=0, brrsz=0;
  int i, j;

  memset(conf, 0x0, sizeof(struct fkeys_conf));

  if (args->key_labels) {

    dbfr_getline(filter_reader);
    dbfr_getline(stream_reader);

    conf->key_count = expand_label_list(args->key_labels,
        filter_reader->current_line,
        delim, &conf->aindexes, &arrsz);

    conf->key_count = expand_label_list(args->key_labels,
                                        stream_reader->current_line,
                                        delim, &conf->bindexes, &brrsz);
    /* preserve header implied */
    args->preserve_header = 1;

  } else if (args->akeys && args->bkeys) {
    ssize_t akeyct, bkeyct;

    akeyct = expand_nums(args->akeys, &conf->aindexes, &arrsz);
    bkeyct = expand_nums(args->bkeys, &conf->bindexes, &brrsz);

    if (akeyct != bkeyct) {
      fprintf(stderr, "a-key and b-key lists must be the same length.\n");
      return EXIT_HELP;
    } else {
      conf->key_count = akeyct;
    }

  } else {
    dbfr_getline(filter_reader);
    dbfr_getline(stream_reader);

    char label_left[MAX_FIELD_LEN + 1], label_right[MAX_FIELD_LEN + 1];
    int nfields_filter = fields_in_line(filter_reader->current_line, delim);
    int nfields_stream = fields_in_line(stream_reader->current_line, delim);

    j = (nfields_filter < nfields_stream ? nfields_filter : nfields_stream);
    conf->aindexes = (int*)malloc(sizeof(int) * j);
    conf->bindexes = (int*)malloc(sizeof(int) * j);

    /* find the keys common to both files */
    for (i = 0; i < nfields_filter; i++)
      for (j = 0; j < nfields_stream; j++) {
        get_line_field(label_left, filter_reader->current_line,
            MAX_FIELD_LEN, i, delim);
        get_line_field(label_right, stream_reader->current_line,
            MAX_FIELD_LEN, j, delim);

        if (strcmp(label_left, label_right) == 0) {
          conf->aindexes[conf->key_count] = i+1;
          conf->bindexes[conf->key_count] = j+1;
          conf->key_count++;
          break;
        }
      }

    /* preserve header implied */
    args->preserve_header = 1;
  }

  for (i = 0; i < conf->key_count; i++) {
    conf->aindexes[i]--;
    conf->bindexes[i]--;
  }

  return (conf->key_count < 1 ? conf->key_count : 0);
}
Example #10
0
/** @brief
 *
 * @param args contains the parsed cmd-line options & arguments.
 * @param argc number of cmd-line arguments.
 * @param argv list of cmd-line arguments
 * @param optind index of the first non-option cmd-line argument.
 *
 * @return exit status for main() to return.
 */
int filterkeys(struct cmdargs *args, int argc, char *argv[], int optind) {
  FILE *ffile, *outfile;
  dbfr_t *filter_reader, *stream_reader;
  char *t_keybuf;
  int i, acum_len;

  if (args->outfile) {
    if ((outfile = fopen(args->outfile, "w")) == NULL) {
      perror(args->outfile);
      exit(EXIT_FILE_ERR);
    }
  } else {
    outfile = stdout;
  }

  /* choose field delimiter */
  if (!(delim = (args->delim ? args->delim : getenv("DELIMITER"))))
    delim = default_delim;
  expand_chars(delim);
  delim_len = strlen(delim);

  /* get the filter file */
  int fd = open64(args->filter_file, O_RDONLY);
  if (fd != -1) {
    ffile = fdopen(fd, "r");
  } else {
    if (!strcmp(args->filter_file, "-")) {
      ffile = stdin;
    } else {
      warn("Opening filter file %s", args->filter_file);
      return EXIT_FILE_ERR;
    }
  }
  filter_reader = dbfr_init( ffile );

  /* input files */
  if (!(ffile = (optind < argc ? nextfile(argc, argv, &optind, "r") : stdin)))
    return EXIT_FILE_ERR;
  stream_reader = dbfr_init( ffile );


  if (configure_filterkeys(&fk_conf, args, filter_reader, stream_reader) != 0) {
    fprintf(stderr, "%s: error setting up configuration.\n", argv[0]);
    return EXIT_HELP;
  }

  load_filter(&fk_conf, filter_reader);
  dbfr_close( filter_reader );

  if (args->preserve_header) {
    /* if indexes where supplied read the header */
    if (args->akeys && args->bkeys)
      dbfr_getline(stream_reader);
    fputs(stream_reader->current_line, outfile);
  }

  t_keybuf = (char *) xmalloc(fk_conf.key_buffer_sz);
  while (ffile) {
    while (dbfr_getline(stream_reader) > 0) {

      for (acum_len = 0, i = 0;
           i < fk_conf.key_count && fk_conf.key_buffer_sz > acum_len;
           i++) {
        acum_len += get_line_field(t_keybuf + acum_len,
                                   stream_reader->current_line,
                                   fk_conf.key_buffer_sz - acum_len,
                                   fk_conf.bindexes[i], delim);
        if (i != fk_conf.key_count -1) {
          strcat(t_keybuf + acum_len, delim);
          acum_len += delim_len;
        }
      }

      if (acum_len > 0) {
        int found = (ht_get(&fk_conf.filter, t_keybuf) ==
                     (void*) 0xDEADBEEF ? 1 : 0);
        if (found ^ args->invert)
          fputs(stream_reader->current_line, outfile);
      }
    }

    dbfr_close(stream_reader);
    if ((ffile = nextfile(argc, argv, &optind, "r"))) {
      stream_reader = dbfr_init( ffile );
      /* reconfigure fields (needed if labels were used) */
      /* TODO(rgranata): implement reconfigure field
      if (reconfigure_filterkeys(&fk_conf, args, NULL, stream_reader) != 0) {
        fprintf(stderr, "%s: error parsing field arguments.\n", argv[0]);
        return EXIT_HELP;
      }
      */
      if (args->preserve_header)
        dbfr_getline(stream_reader);
    }
  }
  if (t_keybuf)
    free(t_keybuf);

  ht_destroy(&fk_conf.filter);

  return 0;
}