Ejemplo n.º 1
0
/* Read an s-expression from a FILE and buffer lines in a linked-list */
char *read_sexp(FILE *in) {
  struct list_t *head = NULL;
  int parens = 0;
  int quote = 0;

  char *buf = malloc(sizeof(*buf) * MAX_LINE);
  if(buf == NULL)
    return NULL;
  buf[0] = '\0';

  /* while there are valid lines and while the parens are not matched */
  char *str;
  while((str = fgets(buf, MAX_LINE, in)) != NULL) {

    quote = strip_comments(buf, quote);
    if(buf[0] == '\n' || buf[0] == '\0') /* skip totally blank lines */
      continue;

    /* break if we've read a matched s-expression */
    if((parens += count_parens(buf, MAX_LINE)) == 0)
      break;

    head = list_push(buf, head);

    buf = malloc(sizeof(*buf) * MAX_LINE);
    if(buf == NULL) {
      list_for_each(head, &list_free_node);
      return NULL;
    }
  }

  if(str == NULL) {
    list_for_each(head, &list_free_node);
    free(buf);
    return NULL;
  }

  quote = strip_comments(buf, quote);
  head = list_push(buf, head);
  head = list_reverse(head);

  size_t len = 0;
  struct list_t *n;
  for(n = head; n != NULL; n = n->next)
    len += strlen((char *) n->data);

  char *concat = malloc(sizeof(*concat) * (len+1));
  char *i = concat;
  for(n = head; n != NULL; n = n->next) {
    len = strlen(n->data);
    strncpy(i, (char*)n->data, len);
    i += len;
  }
  *i = '\0';
  list_for_each(head, &list_free_node);
  return concat;
}
Ejemplo n.º 2
0
/** A convenience function for regular expression searching 

\li There are three common flavors of regular expression: Basic, Extended,
and Perl-compatible (BRE, ERE, PCRE). I use EREs, as per the specs of
your C library, which should match POSIX's ERE specification. 

For example, "p.val" will match "P value", "p.value", "p values" (and even "tempeval", so be
careful).

If you give a non-\c NULL address in which to place a table of paren-delimited substrings, I'll return them as a row in the text element of the returned \ref apop_data set. I'll return <em>all</em> the matches, filling the first row with substrings from the first application of your regex, then filling the next row with another set of matches (if any), and so on to the end of the string. Useful when parsing a list of items, for example.


\param string        The string to search (no default)
\param regex       The regular expression (no default)
\param substrings   Parens in the regex indicate that I should return matching substrings. Give me the _address_ of an \ref apop_data* set, and I will allocate and fill the text portion with matches. Default= \c NULL, meaning do not return substrings (even if parens exist in the regex). If no match, return an empty \ref apop_data set, so <tt>output->textsize[0]==0</tt>.
\param use_case         Should I be case sensitive, \c 'y' or \c 'n'? (default = \c 'n', which is not the POSIX default.)

\return         Count of matches found. 0 == no match. \c substrings may be allocated and filled if needed.
\ingroup names


\li If <tt>apop_opts.stop_on_warning='n'</tt> returns -1 on error (e.g., regex \c NULL or didn't compile).
\li If <tt>strings==NULL</tt>, I return 0---no match---and if \c substrings is provided, set it to \c NULL.

\li Here is the test function. Notice that the substring-pulling
function call passes \c &subs, not plain \c subs. Also, the non-match
has a zero-length blank in <tt>subs->text[0][1]</tt>.

\include test_regex.c

\li Each set of matches will be one row of the output data. E.g., given the regex <tt>([A-Za-z])([0-9])</tt>, the column zero of <tt>outdata</tt> will hold letters, and column one will hold numbers.
Use \ref apop_data_transpose to reverse this so that the letters are in <tt>outdata->text[0]</tt> and numbers in <tt>outdata->text[1]</tt>.
*/
APOP_VAR_HEAD int  apop_regex(const char *string, const char* regex, apop_data **substrings, const char use_case){
    const char * apop_varad_var(string, NULL);
    apop_data **apop_varad_var(substrings, NULL);
    if (!string) {
        if (substrings) *substrings=NULL;
        return 0;
    }
    const char * apop_varad_var(regex, NULL);
    Apop_assert_negone(regex, "You gave me a NULL regex.");
    const char apop_varad_var(use_case, 'n');
APOP_VAR_ENDHEAD
    regex_t re;
    int matchcount=count_parens(regex);
    int found, found_ct=0;
    regmatch_t result[matchcount];
    int compiled_ok = !regcomp(&re, regex, REG_EXTENDED 
                                            + (use_case=='y' ? 0 : REG_ICASE)
                                            + (substrings ? 0 : REG_NOSUB) );
    Apop_stopif(!compiled_ok, return -1, 0, "This regular expression didn't compile: \"%s\"", regex)

    int matchrow = 0;
    if (substrings) *substrings = apop_data_alloc();
    do {
        found_ct+=
        found    = !regexec(&re, string, matchcount+1, result, matchrow ? REG_NOTBOL : 0);
        if (substrings && found){
            *substrings = apop_text_alloc(*substrings, matchrow+1, matchcount);
            //match zero is the whole string; ignore.
            for (int i=0; i< matchcount; i++){
                if (result[i+1].rm_eo > 0){//GNU peculiarity: match-to-empty marked with -1.
                    int length_of_match = result[i+1].rm_eo - result[i+1].rm_so;
                    free((*substrings)->text[matchrow][i]);
                    (*substrings)->text[matchrow][i] = malloc(strlen(string)+1);
                    memcpy((*substrings)->text[matchrow][i], string + result[i+1].rm_so, length_of_match);
                    (*substrings)->text[matchrow][i][length_of_match] = '\0';
                } //else matches nothing; apop_text_alloc already made this cell this NULL.
            }
            string += result[0].rm_eo; //end of whole match;
            matchrow++;
        }
    } while (substrings && found && string[0]!='\0');
    regfree(&re);
    return found_ct;
}