/* XSB string substitution entry point
   In: 
       Arg1: string
       Arg2: beginning offset
       Arg3: ending offset. < 0 means end of string
   Out:
       Arg4: new (output) string
   Always succeeds, unless error.
*/
int do_regsubstring__(void)
{
#ifdef MULTI_THREAD
  if( NULL == th)
	th = xsb_get_main_thread();
#endif

  /* Prolog args are first assigned to these, so we could examine the types
     of these objects to determine if we got strings or atoms. */
  prolog_term input_term, output_term;
  prolog_term beg_offset_term, end_offset_term;
  char *input_string=NULL;    /* string where matches are to be found */
  int beg_offset, end_offset, input_len, substring_len;
  int conversion_required=FALSE;
  
  XSB_StrSet(&output_buffer,"");

  input_term = reg_term(CTXTc 1);  /* Arg1: string to find matches in */
  if (is_string(input_term)) /* check it */
    input_string = string_val(input_term);
  else if (is_list(input_term)) {
    input_string = p_charlist_to_c_string(CTXTc input_term, &input_buffer,
					  "RE_SUBSTRING", "input string");
    conversion_required = TRUE;
  } else
    xsb_abort("[RE_SUBSTRING] Arg 1 (the input string) must be an atom or a character list");

  input_len = strlen(input_string);

  /* arg 2: beginning offset */
  beg_offset_term = reg_term(CTXTc 2);
  if (! is_int(beg_offset_term))
    xsb_abort("[RE_SUBSTRING] Arg 2 (the beginning offset) must be an integer");
  beg_offset = int_val(beg_offset_term);
  if (beg_offset < 0 || beg_offset > input_len)
    xsb_abort("[RE_SUBSTRING] Arg 2 (=%d) must be between 0 and %d",  
	      beg_offset, input_len);

  /* arg 3: ending offset */
  end_offset_term = reg_term(CTXTc 3);
  if (! is_int(end_offset_term))
    xsb_abort("[RE_SUBSTRING] Arg 3 (the ending offset) must be an integer");
  end_offset = int_val(end_offset_term);
  if (end_offset < 0)
    end_offset = input_len;
  else if (end_offset > input_len || end_offset < beg_offset)
    xsb_abort("[RE_SUBSTRING] Arg 3 (=%d) must be < 0 or between %d and %d",
	      end_offset, beg_offset, input_len);

  output_term = reg_term(CTXTc 4);
  if (! is_var(output_term))
    xsb_abort("[RE_SUBSTRING] Arg 4 (the output string) must be an unbound variable");

  /* do the actual replacement */
  substring_len = end_offset-beg_offset;
  XSB_StrAppendBlk(&output_buffer, input_string+beg_offset, substring_len);
  XSB_StrNullTerminate(&output_buffer);
  
  /* get result out */
  if (conversion_required)
    c_string_to_p_charlist(CTXTc output_buffer.string, output_term, 4,
			   "RE_SUBSTITUTE", "Arg 4");
  else
    /* DO NOT intern. When atom table garbage collection is in place, then
       replace the instruction with this:
       	   c2p_string(output_buffer, output_term);
       The reason for not interning is that in Web page
       manipulation it is often necessary to process the same string many
       times. This can cause atom table overflow. Not interning allws us to
       circumvent the problem.  */
    ctop_string(CTXTc 4, output_buffer.string);
  
  return(TRUE);
}
Esempio n. 2
0
/* XSB string substitution entry point
   In:
      Arg1: string
      Arg2: beginning offset
      Arg3: ending offset. `_' or -1: end of string, -2: char before last, etc.
   Out:
      Arg4: new (output) string
   Always succeeds, unless error.
*/
xsbBool substring(CTXTdecl)
{
    /* Prolog args are first assigned to these, so we could examine the types
       of these objects to determine if we got strings or atoms. */
    prolog_term input_term, output_term;
    prolog_term beg_offset_term, end_offset_term;
    char *input_string=NULL;    /* string where matches are to be found */
    Integer beg_offset=0, end_offset=0, input_len=0, substring_len=0;
    int conversion_required=FALSE;

    XSB_StrSet(&output_buffer,"");

    input_term = reg_term(CTXTc 1);  /* Arg1: string to find matches in */
    if (isatom(input_term)) /* check it */
        input_string = string_val(input_term);
    else if (islist(input_term)) {
        input_string = p_charlist_to_c_string(CTXTc input_term, &input_buffer,
                                              "SUBSTRING", "input string");
        conversion_required = TRUE;
    } else
        xsb_abort("[SUBSTRING] Arg 1 (the input string) must be an atom or a character list");

    input_len = strlen(input_string);

    /* arg 2: beginning offset */
    beg_offset_term = reg_term(CTXTc 2);
    if (! (isointeger(beg_offset_term)))
        xsb_abort("[SUBSTRING] Arg 2 (the beginning offset) must be an integer");
    beg_offset = oint_val(beg_offset_term);
    if (beg_offset < 0)
        beg_offset = 0;
    else if (beg_offset > input_len)
        beg_offset = input_len;

    /* arg 3: ending offset */
    end_offset_term = reg_term(CTXTc 3);
    if (isref(end_offset_term))
        end_offset = input_len;
    else if (! (isointeger(end_offset_term)))
        xsb_abort("[SUBSTRING] Arg 3 (the end offset) must be integer or _");
    else end_offset = oint_val(end_offset_term);

    if (end_offset < 0)
        end_offset = input_len + 1 + end_offset;
    else if (end_offset > input_len)
        end_offset = input_len;
    else if (end_offset < beg_offset)
        end_offset = beg_offset;

    output_term = reg_term(CTXTc 4);
    if (! isref(output_term))
        xsb_abort("[SUBSTRING] Arg 4 (the output string) must be an unbound variable");

    /* do the actual replacement */
    substring_len = end_offset-beg_offset;
    XSB_StrAppendBlk(&output_buffer, input_string+beg_offset, (int)substring_len);
    XSB_StrNullTerminate(&output_buffer);

    /* get result out */
    if (conversion_required)
        c_string_to_p_charlist(CTXTc output_buffer.string, output_term,
                               4, "SUBSTRING", "Arg 4");
    else
        c2p_string(CTXTc output_buffer.string, output_term);

    return(TRUE);
}