int do_regcharlist_to_string__(void)
{
#ifdef MULTI_THREAD
  if( NULL == th)
	th = xsb_get_main_thread();
#endif

  prolog_term input_term = reg_term(CTXTc 1);

  p_charlist_to_c_string(CTXTc input_term, &temp_buffer,
			 "RE_CHARLIST_TO_STRING", "input string");
  ctop_string(CTXTc 2, temp_buffer.string);
  return TRUE;
}
/* XSB string substitution entry point
   In: 
       Arg1: string
       Arg2: beginning offset
       Arg3: ending offset. < 0 means end of string
   Out:
       Arg4: new (output) string
   Always succeeds, unless error.
*/
int do_regsubstring__(void)
{
#ifdef MULTI_THREAD
  if( NULL == th)
	th = xsb_get_main_thread();
#endif

  /* Prolog args are first assigned to these, so we could examine the types
     of these objects to determine if we got strings or atoms. */
  prolog_term input_term, output_term;
  prolog_term beg_offset_term, end_offset_term;
  char *input_string=NULL;    /* string where matches are to be found */
  int beg_offset, end_offset, input_len, substring_len;
  int conversion_required=FALSE;
  
  XSB_StrSet(&output_buffer,"");

  input_term = reg_term(CTXTc 1);  /* Arg1: string to find matches in */
  if (is_string(input_term)) /* check it */
    input_string = string_val(input_term);
  else if (is_list(input_term)) {
    input_string = p_charlist_to_c_string(CTXTc input_term, &input_buffer,
					  "RE_SUBSTRING", "input string");
    conversion_required = TRUE;
  } else
    xsb_abort("[RE_SUBSTRING] Arg 1 (the input string) must be an atom or a character list");

  input_len = strlen(input_string);

  /* arg 2: beginning offset */
  beg_offset_term = reg_term(CTXTc 2);
  if (! is_int(beg_offset_term))
    xsb_abort("[RE_SUBSTRING] Arg 2 (the beginning offset) must be an integer");
  beg_offset = int_val(beg_offset_term);
  if (beg_offset < 0 || beg_offset > input_len)
    xsb_abort("[RE_SUBSTRING] Arg 2 (=%d) must be between 0 and %d",  
	      beg_offset, input_len);

  /* arg 3: ending offset */
  end_offset_term = reg_term(CTXTc 3);
  if (! is_int(end_offset_term))
    xsb_abort("[RE_SUBSTRING] Arg 3 (the ending offset) must be an integer");
  end_offset = int_val(end_offset_term);
  if (end_offset < 0)
    end_offset = input_len;
  else if (end_offset > input_len || end_offset < beg_offset)
    xsb_abort("[RE_SUBSTRING] Arg 3 (=%d) must be < 0 or between %d and %d",
	      end_offset, beg_offset, input_len);

  output_term = reg_term(CTXTc 4);
  if (! is_var(output_term))
    xsb_abort("[RE_SUBSTRING] Arg 4 (the output string) must be an unbound variable");

  /* do the actual replacement */
  substring_len = end_offset-beg_offset;
  XSB_StrAppendBlk(&output_buffer, input_string+beg_offset, substring_len);
  XSB_StrNullTerminate(&output_buffer);
  
  /* get result out */
  if (conversion_required)
    c_string_to_p_charlist(CTXTc output_buffer.string, output_term, 4,
			   "RE_SUBSTITUTE", "Arg 4");
  else
    /* DO NOT intern. When atom table garbage collection is in place, then
       replace the instruction with this:
       	   c2p_string(output_buffer, output_term);
       The reason for not interning is that in Web page
       manipulation it is often necessary to process the same string many
       times. This can cause atom table overflow. Not interning allws us to
       circumvent the problem.  */
    ctop_string(CTXTc 4, output_buffer.string);
  
  return(TRUE);
}
/* XSB string substitution entry point: replace substrings specified in Arg2
   with strings in Arg3.
   In: 
       Arg1: string
       Arg2: substring specification, a list [s(B1,E1),s(B2,E2),...]
       Arg3: list of replacement string
   Out:
       Arg4: new (output) string
   Always succeeds, unless error.
*/
int do_regsubstitute__(void)
{
#ifdef MULTI_THREAD
  if( NULL == th)
	th = xsb_get_main_thread();
#endif

  /* Prolog args are first assigned to these, so we could examine the types
     of these objects to determine if we got strings or atoms. */
  prolog_term input_term, output_term;
  prolog_term subst_reg_term, subst_spec_list_term, subst_spec_list_term1;
  prolog_term subst_str_term=(prolog_term)0,
    subst_str_list_term, subst_str_list_term1;
  char *input_string=NULL;    /* string where matches are to be found */
  char *subst_string=NULL;
  prolog_term beg_term, end_term;
  int beg_offset=0, end_offset=0, input_len;
  int last_pos = 0; /* last scanned pos in input string */
  /* the output buffer is made large enough to include the input string and the
     substitution string. */
  int conversion_required=FALSE; /* from C string to Prolog char list */
  
  XSB_StrSet(&output_buffer,"");

  input_term = reg_term(CTXTc 1);  /* Arg1: string to find matches in */
  if (is_string(input_term)) /* check it */
    input_string = string_val(input_term);
  else if (is_list(input_term)) {
    input_string = p_charlist_to_c_string(CTXTc input_term, &input_buffer,
					  "RE_SUBSTITUTE", "input string");
    conversion_required = TRUE;
  } else
    xsb_abort("[RE_SUBSTITUTE] Arg 1 (the input string) must be an atom or a character list");

  input_len = strlen(input_string);

  /* arg 2: substring specification */
  subst_spec_list_term = reg_term(CTXTc 2);
  if (!is_list(subst_spec_list_term) && !is_nil(subst_spec_list_term))
    xsb_abort("[RE_SUBSTITUTE] Arg 2 must be a list [s(B1,E1),s(B2,E2),...]");

  /* handle substitution string */
  subst_str_list_term = reg_term(CTXTc 3);
  if (! is_list(subst_str_list_term))
    xsb_abort("[RE_SUBSTITUTE] Arg 3 must be a list of strings");

  output_term = reg_term(CTXTc 4);
  if (! is_var(output_term))
    xsb_abort("[RE_SUBSTITUTE] Arg 4 (the output) must be an unbound variable");

  subst_spec_list_term1 = subst_spec_list_term;
  subst_str_list_term1 = subst_str_list_term;

  if (is_nil(subst_spec_list_term1)) {
    XSB_StrSet(&output_buffer, input_string);
    goto EXIT;
  }
  if (is_nil(subst_str_list_term1))
    xsb_abort("[RE_SUBSTITUTE] Arg 3 must not be an empty list");

  do {
    subst_reg_term = p2p_car(subst_spec_list_term1);
    subst_spec_list_term1 = p2p_cdr(subst_spec_list_term1);

    if (!is_nil(subst_str_list_term1)) {
      subst_str_term = p2p_car(subst_str_list_term1);
      subst_str_list_term1 = p2p_cdr(subst_str_list_term1);

      if (is_string(subst_str_term)) {
	subst_string = string_val(subst_str_term);
      } else if (is_list(subst_str_term)) {
	subst_string = p_charlist_to_c_string(CTXTc subst_str_term, &subst_buf,
					      "RE_SUBSTITUTE",
					      "substitution string");
      } else 
	xsb_abort("[RE_SUBSTITUTE] Arg 3 must be a list of strings");
    }

    beg_term = p2p_arg(subst_reg_term,1);
    end_term = p2p_arg(subst_reg_term,2);

    if (!is_int(beg_term) || !is_int(end_term))
      xsb_abort("[RE_SUBSTITUTE] Non-integer in Arg 2");
    else{
      beg_offset = int_val(beg_term);
      end_offset = int_val(end_term);
    }
    /* -1 means end of string */
    if (end_offset < 0)
      end_offset = input_len;
    if ((end_offset < beg_offset) || (beg_offset < last_pos))
      xsb_abort("[RE_SUBSTITUTE] Substitution regions in Arg 2 not sorted");

    /* do the actual replacement */
    XSB_StrAppendBlk(&output_buffer,input_string+last_pos,beg_offset-last_pos);
    XSB_StrAppend(&output_buffer, subst_string);
    
    last_pos = end_offset;

  } while (!is_nil(subst_spec_list_term1));

  XSB_StrAppend(&output_buffer, input_string+end_offset);

 EXIT:
  /* get result out */
  if (conversion_required)
    c_string_to_p_charlist(CTXTc output_buffer.string, output_term, 4,
			   "RE_SUBSTITUTE", "Arg 4");
  else
    /* DO NOT intern. When atom table garbage collection is in place, then
       replace the instruction with this:
       	   c2p_string(output_buffer, output_term);
       The reason for not interning is that in Web page
       manipulation it is often necessary to process the same string many
       times. This can cause atom table overflow. Not interning allws us to
       circumvent the problem.  */
    ctop_string(CTXTc 4, output_buffer.string);
  
  return(TRUE);
}
/* XSB regular expression matcher entry point
   In:
       Arg1: regexp
       Arg2: string
       Arg3: offset
       Arg4: match_flags: Var means case-sensitive/extended;
       	       	          number: ignorecase/extended
			  List: [{extended|ignorecase},...]
   Out:
       Arg5: list of the form [match(bo0,eo0), match(bo1,eo1),...]
       	     where bo*,eo* specify the beginning and ending offsets of the
	     matched substrings.
	     All matched substrings are returned. Parenthesized expressions are
	     ignored.
*/
int do_bulkmatch__(void)
{
#ifdef MULTI_THREAD
  if( NULL == th)
	th = xsb_get_main_thread();
#endif

  prolog_term listHead, listTail;
  /* Prolog args are first assigned to these, so we could examine the types
     of these objects to determine if we got strings or atoms. */
  prolog_term regexp_term, input_term, offset_term;
  prolog_term output_term = p2p_new(CTXT);
  char *regexp_ptr=NULL;      /* regular expression ptr	       	      */
  char *input_string=NULL;    /* string where matches are to be found */
  int match_flags=FALSE;
  int return_code, paren_number, offset;
  regmatch_t *match_array;
  int last_pos=0, input_len;
  
  if (first_call)
    initialize_regexp_tbl();

  regexp_term = reg_term(CTXTc 1);  /* Arg1: regexp */
  if (is_string(regexp_term)) /* check it */
    regexp_ptr = string_val(regexp_term);
  else if (is_list(regexp_term))
    regexp_ptr = p_charlist_to_c_string(CTXTc regexp_term, &regexp_buffer,
					"RE_BULKMATCH", "regular expression");
  else
    xsb_abort("[RE_BULKMATCH] Arg 1 (the regular expression) must be an atom or a character list");

  input_term = reg_term(CTXTc 2);  /* Arg2: string to find matches in */
  if (is_string(input_term)) /* check it */
    input_string = string_val(input_term);
  else if (is_list(input_term)) {
    input_string = p_charlist_to_c_string(CTXTc input_term, &input_buffer,
					  "RE_BULKMATCH", "input string");
  } else
    xsb_abort("[RE_BULKMATCH] Arg 2 (the input string) must be an atom or a character list");

  input_len = strlen(input_string);
  
  offset_term = reg_term(CTXTc 3); /* arg3: offset within the string */
  if (! is_int(offset_term))
    xsb_abort("[RE_BULKMATCH] Arg 3 (the offset) must be an integer");
  offset = int_val(offset_term);
  if (offset < 0 || offset > input_len)
    xsb_abort("[RE_BULKMATCH] Arg 3 (=%d) must be between 0 and %d", input_len);

   /* arg 4 specifies flags: _, number, list [extended,ignorecase] */
  match_flags = make_flags(reg_term(CTXTc 4), "RE_BULKMATCH");

  last_pos = offset;
  /* returned result */
  listTail = output_term;
  while (last_pos < input_len) {
    return_code = xsb_re_match(regexp_ptr, input_string+last_pos, match_flags,
			       &match_array, &paren_number, "RE_BULKMATCH");
    /* exit on no match */
    if (! return_code) break;

    c2p_list(CTXTc listTail); /* make it into a list */
    listHead = p2p_car(listTail); /* get head of the list */

    /* bind i-th match to listHead as match(beg,end) */
    c2p_functor(CTXTc "match", 2, listHead);
    c2p_int(CTXTc match_array[0].rm_so+last_pos, p2p_arg(listHead,1));
    c2p_int(CTXTc match_array[0].rm_eo+last_pos, p2p_arg(listHead,2));

    listTail = p2p_cdr(listTail);
    if (match_array[0].rm_eo > 0)
      last_pos = match_array[0].rm_eo+last_pos;
    else
      last_pos++;
  }

  c2p_nil(CTXTc listTail); /* bind tail to nil */
  return p2p_unify(CTXTc output_term, reg_term(CTXTc 5));
}
Beispiel #5
0
/* XSB string substitution entry point: replace substrings specified in Arg2
   with strings in Arg3.
   In:
       Arg1: string
       Arg2: substring specification, a list [s(B1,E1),s(B2,E2),...]
       Arg3: list of replacement strings
   Out:
       Arg4: new (output) string
   Always succeeds, unless error.
*/
xsbBool string_substitute(CTXTdecl)
{
    /* Prolog args are first assigned to these, so we could examine the types
       of these objects to determine if we got strings or atoms. */
    prolog_term input_term, output_term;
    prolog_term subst_reg_term, subst_spec_list_term, subst_spec_list_term1;
    prolog_term subst_str_term=(prolog_term)0,
                subst_str_list_term, subst_str_list_term1;
    char *input_string=NULL;    /* string where matches are to be found */
    char *subst_string=NULL;
    prolog_term beg_term, end_term;
    Integer beg_offset=0, end_offset=0, input_len;
    Integer last_pos = 0; /* last scanned pos in input string */
    /* the output buffer is made large enough to include the input string and the
       substitution string. */
    int conversion_required=FALSE; /* from C string to Prolog char list */

    XSB_StrSet(&output_buffer,"");

    input_term = reg_term(CTXTc 1);  /* Arg1: string to find matches in */
    if (isatom(input_term)) /* check it */
        input_string = string_val(input_term);
    else if (islist(input_term)) {
        input_string = p_charlist_to_c_string(CTXTc input_term, &input_buffer,
                                              "STRING_SUBSTITUTE", "input string");
        conversion_required = TRUE;
    } else
        xsb_abort("[STRING_SUBSTITUTE] Arg 1 (the input string) must be an atom or a character list");

    input_len = strlen(input_string);

    /* arg 2: substring specification */
    subst_spec_list_term = reg_term(CTXTc 2);
    if (!islist(subst_spec_list_term) && !isnil(subst_spec_list_term))
        xsb_abort("[STRING_SUBSTITUTE] Arg 2 must be a list [s(B1,E1),s(B2,E2),...]");

    /* handle substitution string */
    subst_str_list_term = reg_term(CTXTc 3);
    if (! islist(subst_str_list_term))
        xsb_abort("[STRING_SUBSTITUTE] Arg 3 must be a list of strings");

    output_term = reg_term(CTXTc 4);
    if (! isref(output_term))
        xsb_abort("[STRING_SUBSTITUTE] Arg 4 (the output) must be an unbound variable");

    subst_spec_list_term1 = subst_spec_list_term;
    subst_str_list_term1 = subst_str_list_term;

    if (isnil(subst_spec_list_term1)) {
        XSB_StrSet(&output_buffer, input_string);
        goto EXIT;
    }
    if (isnil(subst_str_list_term1))
        xsb_abort("[STRING_SUBSTITUTE] Arg 3 must not be an empty list");

    do {
        subst_reg_term = p2p_car(subst_spec_list_term1);
        subst_spec_list_term1 = p2p_cdr(subst_spec_list_term1);

        if (!isnil(subst_str_list_term1)) {
            subst_str_term = p2p_car(subst_str_list_term1);
            subst_str_list_term1 = p2p_cdr(subst_str_list_term1);

            if (isatom(subst_str_term)) {
                subst_string = string_val(subst_str_term);
            } else if (islist(subst_str_term)) {
                subst_string = p_charlist_to_c_string(CTXTc subst_str_term, &subst_buf,
                                                      "STRING_SUBSTITUTE",
                                                      "substitution string");
            } else
                xsb_abort("[STRING_SUBSTITUTE] Arg 3 must be a list of strings");
        }

        beg_term = p2p_arg(subst_reg_term,1);
        end_term = p2p_arg(subst_reg_term,2);

        if (!(isointeger(beg_term)) ||
                !(isointeger(end_term)))
            xsb_abort("[STRING_SUBSTITUTE] Non-integer in Arg 2");
        else {
            beg_offset = oint_val(beg_term);
            end_offset = oint_val(end_term);
        }
        /* -1 means end of string */
        if (end_offset < 0)
            end_offset = input_len;
        if ((end_offset < beg_offset) || (beg_offset < last_pos))
            xsb_abort("[STRING_SUBSTITUTE] Substitution regions in Arg 2 not sorted");

        /* do the actual replacement */
        XSB_StrAppendBlk(&output_buffer,input_string+last_pos,(int)(beg_offset-last_pos));
        XSB_StrAppend(&output_buffer, subst_string);

        last_pos = end_offset;

    } while (!isnil(subst_spec_list_term1));

    XSB_StrAppend(&output_buffer, input_string+end_offset);

EXIT:
    /* get result out */
    if (conversion_required)
        c_string_to_p_charlist(CTXTc output_buffer.string, output_term,
                               4, "STRING_SUBSTITUTE", "Arg 4");
    else
        c2p_string(CTXTc output_buffer.string, output_term);

    return(TRUE);
}
Beispiel #6
0
/* XSB string substitution entry point
   In:
      Arg1: string
      Arg2: beginning offset
      Arg3: ending offset. `_' or -1: end of string, -2: char before last, etc.
   Out:
      Arg4: new (output) string
   Always succeeds, unless error.
*/
xsbBool substring(CTXTdecl)
{
    /* Prolog args are first assigned to these, so we could examine the types
       of these objects to determine if we got strings or atoms. */
    prolog_term input_term, output_term;
    prolog_term beg_offset_term, end_offset_term;
    char *input_string=NULL;    /* string where matches are to be found */
    Integer beg_offset=0, end_offset=0, input_len=0, substring_len=0;
    int conversion_required=FALSE;

    XSB_StrSet(&output_buffer,"");

    input_term = reg_term(CTXTc 1);  /* Arg1: string to find matches in */
    if (isatom(input_term)) /* check it */
        input_string = string_val(input_term);
    else if (islist(input_term)) {
        input_string = p_charlist_to_c_string(CTXTc input_term, &input_buffer,
                                              "SUBSTRING", "input string");
        conversion_required = TRUE;
    } else
        xsb_abort("[SUBSTRING] Arg 1 (the input string) must be an atom or a character list");

    input_len = strlen(input_string);

    /* arg 2: beginning offset */
    beg_offset_term = reg_term(CTXTc 2);
    if (! (isointeger(beg_offset_term)))
        xsb_abort("[SUBSTRING] Arg 2 (the beginning offset) must be an integer");
    beg_offset = oint_val(beg_offset_term);
    if (beg_offset < 0)
        beg_offset = 0;
    else if (beg_offset > input_len)
        beg_offset = input_len;

    /* arg 3: ending offset */
    end_offset_term = reg_term(CTXTc 3);
    if (isref(end_offset_term))
        end_offset = input_len;
    else if (! (isointeger(end_offset_term)))
        xsb_abort("[SUBSTRING] Arg 3 (the end offset) must be integer or _");
    else end_offset = oint_val(end_offset_term);

    if (end_offset < 0)
        end_offset = input_len + 1 + end_offset;
    else if (end_offset > input_len)
        end_offset = input_len;
    else if (end_offset < beg_offset)
        end_offset = beg_offset;

    output_term = reg_term(CTXTc 4);
    if (! isref(output_term))
        xsb_abort("[SUBSTRING] Arg 4 (the output string) must be an unbound variable");

    /* do the actual replacement */
    substring_len = end_offset-beg_offset;
    XSB_StrAppendBlk(&output_buffer, input_string+beg_offset, (int)substring_len);
    XSB_StrNullTerminate(&output_buffer);

    /* get result out */
    if (conversion_required)
        c_string_to_p_charlist(CTXTc output_buffer.string, output_term,
                               4, "SUBSTRING", "Arg 4");
    else
        c2p_string(CTXTc output_buffer.string, output_term);

    return(TRUE);
}