int do_regcharlist_to_string__(void) { #ifdef MULTI_THREAD if( NULL == th) th = xsb_get_main_thread(); #endif prolog_term input_term = reg_term(CTXTc 1); p_charlist_to_c_string(CTXTc input_term, &temp_buffer, "RE_CHARLIST_TO_STRING", "input string"); ctop_string(CTXTc 2, temp_buffer.string); return TRUE; }
/* XSB string substitution entry point In: Arg1: string Arg2: beginning offset Arg3: ending offset. < 0 means end of string Out: Arg4: new (output) string Always succeeds, unless error. */ int do_regsubstring__(void) { #ifdef MULTI_THREAD if( NULL == th) th = xsb_get_main_thread(); #endif /* Prolog args are first assigned to these, so we could examine the types of these objects to determine if we got strings or atoms. */ prolog_term input_term, output_term; prolog_term beg_offset_term, end_offset_term; char *input_string=NULL; /* string where matches are to be found */ int beg_offset, end_offset, input_len, substring_len; int conversion_required=FALSE; XSB_StrSet(&output_buffer,""); input_term = reg_term(CTXTc 1); /* Arg1: string to find matches in */ if (is_string(input_term)) /* check it */ input_string = string_val(input_term); else if (is_list(input_term)) { input_string = p_charlist_to_c_string(CTXTc input_term, &input_buffer, "RE_SUBSTRING", "input string"); conversion_required = TRUE; } else xsb_abort("[RE_SUBSTRING] Arg 1 (the input string) must be an atom or a character list"); input_len = strlen(input_string); /* arg 2: beginning offset */ beg_offset_term = reg_term(CTXTc 2); if (! is_int(beg_offset_term)) xsb_abort("[RE_SUBSTRING] Arg 2 (the beginning offset) must be an integer"); beg_offset = int_val(beg_offset_term); if (beg_offset < 0 || beg_offset > input_len) xsb_abort("[RE_SUBSTRING] Arg 2 (=%d) must be between 0 and %d", beg_offset, input_len); /* arg 3: ending offset */ end_offset_term = reg_term(CTXTc 3); if (! is_int(end_offset_term)) xsb_abort("[RE_SUBSTRING] Arg 3 (the ending offset) must be an integer"); end_offset = int_val(end_offset_term); if (end_offset < 0) end_offset = input_len; else if (end_offset > input_len || end_offset < beg_offset) xsb_abort("[RE_SUBSTRING] Arg 3 (=%d) must be < 0 or between %d and %d", end_offset, beg_offset, input_len); output_term = reg_term(CTXTc 4); if (! is_var(output_term)) xsb_abort("[RE_SUBSTRING] Arg 4 (the output string) must be an unbound variable"); /* do the actual replacement */ substring_len = end_offset-beg_offset; XSB_StrAppendBlk(&output_buffer, input_string+beg_offset, substring_len); XSB_StrNullTerminate(&output_buffer); /* get result out */ if (conversion_required) c_string_to_p_charlist(CTXTc output_buffer.string, output_term, 4, "RE_SUBSTITUTE", "Arg 4"); else /* DO NOT intern. When atom table garbage collection is in place, then replace the instruction with this: c2p_string(output_buffer, output_term); The reason for not interning is that in Web page manipulation it is often necessary to process the same string many times. This can cause atom table overflow. Not interning allws us to circumvent the problem. */ ctop_string(CTXTc 4, output_buffer.string); return(TRUE); }
/* XSB string substitution entry point: replace substrings specified in Arg2 with strings in Arg3. In: Arg1: string Arg2: substring specification, a list [s(B1,E1),s(B2,E2),...] Arg3: list of replacement string Out: Arg4: new (output) string Always succeeds, unless error. */ int do_regsubstitute__(void) { #ifdef MULTI_THREAD if( NULL == th) th = xsb_get_main_thread(); #endif /* Prolog args are first assigned to these, so we could examine the types of these objects to determine if we got strings or atoms. */ prolog_term input_term, output_term; prolog_term subst_reg_term, subst_spec_list_term, subst_spec_list_term1; prolog_term subst_str_term=(prolog_term)0, subst_str_list_term, subst_str_list_term1; char *input_string=NULL; /* string where matches are to be found */ char *subst_string=NULL; prolog_term beg_term, end_term; int beg_offset=0, end_offset=0, input_len; int last_pos = 0; /* last scanned pos in input string */ /* the output buffer is made large enough to include the input string and the substitution string. */ int conversion_required=FALSE; /* from C string to Prolog char list */ XSB_StrSet(&output_buffer,""); input_term = reg_term(CTXTc 1); /* Arg1: string to find matches in */ if (is_string(input_term)) /* check it */ input_string = string_val(input_term); else if (is_list(input_term)) { input_string = p_charlist_to_c_string(CTXTc input_term, &input_buffer, "RE_SUBSTITUTE", "input string"); conversion_required = TRUE; } else xsb_abort("[RE_SUBSTITUTE] Arg 1 (the input string) must be an atom or a character list"); input_len = strlen(input_string); /* arg 2: substring specification */ subst_spec_list_term = reg_term(CTXTc 2); if (!is_list(subst_spec_list_term) && !is_nil(subst_spec_list_term)) xsb_abort("[RE_SUBSTITUTE] Arg 2 must be a list [s(B1,E1),s(B2,E2),...]"); /* handle substitution string */ subst_str_list_term = reg_term(CTXTc 3); if (! is_list(subst_str_list_term)) xsb_abort("[RE_SUBSTITUTE] Arg 3 must be a list of strings"); output_term = reg_term(CTXTc 4); if (! is_var(output_term)) xsb_abort("[RE_SUBSTITUTE] Arg 4 (the output) must be an unbound variable"); subst_spec_list_term1 = subst_spec_list_term; subst_str_list_term1 = subst_str_list_term; if (is_nil(subst_spec_list_term1)) { XSB_StrSet(&output_buffer, input_string); goto EXIT; } if (is_nil(subst_str_list_term1)) xsb_abort("[RE_SUBSTITUTE] Arg 3 must not be an empty list"); do { subst_reg_term = p2p_car(subst_spec_list_term1); subst_spec_list_term1 = p2p_cdr(subst_spec_list_term1); if (!is_nil(subst_str_list_term1)) { subst_str_term = p2p_car(subst_str_list_term1); subst_str_list_term1 = p2p_cdr(subst_str_list_term1); if (is_string(subst_str_term)) { subst_string = string_val(subst_str_term); } else if (is_list(subst_str_term)) { subst_string = p_charlist_to_c_string(CTXTc subst_str_term, &subst_buf, "RE_SUBSTITUTE", "substitution string"); } else xsb_abort("[RE_SUBSTITUTE] Arg 3 must be a list of strings"); } beg_term = p2p_arg(subst_reg_term,1); end_term = p2p_arg(subst_reg_term,2); if (!is_int(beg_term) || !is_int(end_term)) xsb_abort("[RE_SUBSTITUTE] Non-integer in Arg 2"); else{ beg_offset = int_val(beg_term); end_offset = int_val(end_term); } /* -1 means end of string */ if (end_offset < 0) end_offset = input_len; if ((end_offset < beg_offset) || (beg_offset < last_pos)) xsb_abort("[RE_SUBSTITUTE] Substitution regions in Arg 2 not sorted"); /* do the actual replacement */ XSB_StrAppendBlk(&output_buffer,input_string+last_pos,beg_offset-last_pos); XSB_StrAppend(&output_buffer, subst_string); last_pos = end_offset; } while (!is_nil(subst_spec_list_term1)); XSB_StrAppend(&output_buffer, input_string+end_offset); EXIT: /* get result out */ if (conversion_required) c_string_to_p_charlist(CTXTc output_buffer.string, output_term, 4, "RE_SUBSTITUTE", "Arg 4"); else /* DO NOT intern. When atom table garbage collection is in place, then replace the instruction with this: c2p_string(output_buffer, output_term); The reason for not interning is that in Web page manipulation it is often necessary to process the same string many times. This can cause atom table overflow. Not interning allws us to circumvent the problem. */ ctop_string(CTXTc 4, output_buffer.string); return(TRUE); }
/* XSB regular expression matcher entry point In: Arg1: regexp Arg2: string Arg3: offset Arg4: match_flags: Var means case-sensitive/extended; number: ignorecase/extended List: [{extended|ignorecase},...] Out: Arg5: list of the form [match(bo0,eo0), match(bo1,eo1),...] where bo*,eo* specify the beginning and ending offsets of the matched substrings. All matched substrings are returned. Parenthesized expressions are ignored. */ int do_bulkmatch__(void) { #ifdef MULTI_THREAD if( NULL == th) th = xsb_get_main_thread(); #endif prolog_term listHead, listTail; /* Prolog args are first assigned to these, so we could examine the types of these objects to determine if we got strings or atoms. */ prolog_term regexp_term, input_term, offset_term; prolog_term output_term = p2p_new(CTXT); char *regexp_ptr=NULL; /* regular expression ptr */ char *input_string=NULL; /* string where matches are to be found */ int match_flags=FALSE; int return_code, paren_number, offset; regmatch_t *match_array; int last_pos=0, input_len; if (first_call) initialize_regexp_tbl(); regexp_term = reg_term(CTXTc 1); /* Arg1: regexp */ if (is_string(regexp_term)) /* check it */ regexp_ptr = string_val(regexp_term); else if (is_list(regexp_term)) regexp_ptr = p_charlist_to_c_string(CTXTc regexp_term, ®exp_buffer, "RE_BULKMATCH", "regular expression"); else xsb_abort("[RE_BULKMATCH] Arg 1 (the regular expression) must be an atom or a character list"); input_term = reg_term(CTXTc 2); /* Arg2: string to find matches in */ if (is_string(input_term)) /* check it */ input_string = string_val(input_term); else if (is_list(input_term)) { input_string = p_charlist_to_c_string(CTXTc input_term, &input_buffer, "RE_BULKMATCH", "input string"); } else xsb_abort("[RE_BULKMATCH] Arg 2 (the input string) must be an atom or a character list"); input_len = strlen(input_string); offset_term = reg_term(CTXTc 3); /* arg3: offset within the string */ if (! is_int(offset_term)) xsb_abort("[RE_BULKMATCH] Arg 3 (the offset) must be an integer"); offset = int_val(offset_term); if (offset < 0 || offset > input_len) xsb_abort("[RE_BULKMATCH] Arg 3 (=%d) must be between 0 and %d", input_len); /* arg 4 specifies flags: _, number, list [extended,ignorecase] */ match_flags = make_flags(reg_term(CTXTc 4), "RE_BULKMATCH"); last_pos = offset; /* returned result */ listTail = output_term; while (last_pos < input_len) { return_code = xsb_re_match(regexp_ptr, input_string+last_pos, match_flags, &match_array, &paren_number, "RE_BULKMATCH"); /* exit on no match */ if (! return_code) break; c2p_list(CTXTc listTail); /* make it into a list */ listHead = p2p_car(listTail); /* get head of the list */ /* bind i-th match to listHead as match(beg,end) */ c2p_functor(CTXTc "match", 2, listHead); c2p_int(CTXTc match_array[0].rm_so+last_pos, p2p_arg(listHead,1)); c2p_int(CTXTc match_array[0].rm_eo+last_pos, p2p_arg(listHead,2)); listTail = p2p_cdr(listTail); if (match_array[0].rm_eo > 0) last_pos = match_array[0].rm_eo+last_pos; else last_pos++; } c2p_nil(CTXTc listTail); /* bind tail to nil */ return p2p_unify(CTXTc output_term, reg_term(CTXTc 5)); }
/* XSB string substitution entry point: replace substrings specified in Arg2 with strings in Arg3. In: Arg1: string Arg2: substring specification, a list [s(B1,E1),s(B2,E2),...] Arg3: list of replacement strings Out: Arg4: new (output) string Always succeeds, unless error. */ xsbBool string_substitute(CTXTdecl) { /* Prolog args are first assigned to these, so we could examine the types of these objects to determine if we got strings or atoms. */ prolog_term input_term, output_term; prolog_term subst_reg_term, subst_spec_list_term, subst_spec_list_term1; prolog_term subst_str_term=(prolog_term)0, subst_str_list_term, subst_str_list_term1; char *input_string=NULL; /* string where matches are to be found */ char *subst_string=NULL; prolog_term beg_term, end_term; Integer beg_offset=0, end_offset=0, input_len; Integer last_pos = 0; /* last scanned pos in input string */ /* the output buffer is made large enough to include the input string and the substitution string. */ int conversion_required=FALSE; /* from C string to Prolog char list */ XSB_StrSet(&output_buffer,""); input_term = reg_term(CTXTc 1); /* Arg1: string to find matches in */ if (isatom(input_term)) /* check it */ input_string = string_val(input_term); else if (islist(input_term)) { input_string = p_charlist_to_c_string(CTXTc input_term, &input_buffer, "STRING_SUBSTITUTE", "input string"); conversion_required = TRUE; } else xsb_abort("[STRING_SUBSTITUTE] Arg 1 (the input string) must be an atom or a character list"); input_len = strlen(input_string); /* arg 2: substring specification */ subst_spec_list_term = reg_term(CTXTc 2); if (!islist(subst_spec_list_term) && !isnil(subst_spec_list_term)) xsb_abort("[STRING_SUBSTITUTE] Arg 2 must be a list [s(B1,E1),s(B2,E2),...]"); /* handle substitution string */ subst_str_list_term = reg_term(CTXTc 3); if (! islist(subst_str_list_term)) xsb_abort("[STRING_SUBSTITUTE] Arg 3 must be a list of strings"); output_term = reg_term(CTXTc 4); if (! isref(output_term)) xsb_abort("[STRING_SUBSTITUTE] Arg 4 (the output) must be an unbound variable"); subst_spec_list_term1 = subst_spec_list_term; subst_str_list_term1 = subst_str_list_term; if (isnil(subst_spec_list_term1)) { XSB_StrSet(&output_buffer, input_string); goto EXIT; } if (isnil(subst_str_list_term1)) xsb_abort("[STRING_SUBSTITUTE] Arg 3 must not be an empty list"); do { subst_reg_term = p2p_car(subst_spec_list_term1); subst_spec_list_term1 = p2p_cdr(subst_spec_list_term1); if (!isnil(subst_str_list_term1)) { subst_str_term = p2p_car(subst_str_list_term1); subst_str_list_term1 = p2p_cdr(subst_str_list_term1); if (isatom(subst_str_term)) { subst_string = string_val(subst_str_term); } else if (islist(subst_str_term)) { subst_string = p_charlist_to_c_string(CTXTc subst_str_term, &subst_buf, "STRING_SUBSTITUTE", "substitution string"); } else xsb_abort("[STRING_SUBSTITUTE] Arg 3 must be a list of strings"); } beg_term = p2p_arg(subst_reg_term,1); end_term = p2p_arg(subst_reg_term,2); if (!(isointeger(beg_term)) || !(isointeger(end_term))) xsb_abort("[STRING_SUBSTITUTE] Non-integer in Arg 2"); else { beg_offset = oint_val(beg_term); end_offset = oint_val(end_term); } /* -1 means end of string */ if (end_offset < 0) end_offset = input_len; if ((end_offset < beg_offset) || (beg_offset < last_pos)) xsb_abort("[STRING_SUBSTITUTE] Substitution regions in Arg 2 not sorted"); /* do the actual replacement */ XSB_StrAppendBlk(&output_buffer,input_string+last_pos,(int)(beg_offset-last_pos)); XSB_StrAppend(&output_buffer, subst_string); last_pos = end_offset; } while (!isnil(subst_spec_list_term1)); XSB_StrAppend(&output_buffer, input_string+end_offset); EXIT: /* get result out */ if (conversion_required) c_string_to_p_charlist(CTXTc output_buffer.string, output_term, 4, "STRING_SUBSTITUTE", "Arg 4"); else c2p_string(CTXTc output_buffer.string, output_term); return(TRUE); }
/* XSB string substitution entry point In: Arg1: string Arg2: beginning offset Arg3: ending offset. `_' or -1: end of string, -2: char before last, etc. Out: Arg4: new (output) string Always succeeds, unless error. */ xsbBool substring(CTXTdecl) { /* Prolog args are first assigned to these, so we could examine the types of these objects to determine if we got strings or atoms. */ prolog_term input_term, output_term; prolog_term beg_offset_term, end_offset_term; char *input_string=NULL; /* string where matches are to be found */ Integer beg_offset=0, end_offset=0, input_len=0, substring_len=0; int conversion_required=FALSE; XSB_StrSet(&output_buffer,""); input_term = reg_term(CTXTc 1); /* Arg1: string to find matches in */ if (isatom(input_term)) /* check it */ input_string = string_val(input_term); else if (islist(input_term)) { input_string = p_charlist_to_c_string(CTXTc input_term, &input_buffer, "SUBSTRING", "input string"); conversion_required = TRUE; } else xsb_abort("[SUBSTRING] Arg 1 (the input string) must be an atom or a character list"); input_len = strlen(input_string); /* arg 2: beginning offset */ beg_offset_term = reg_term(CTXTc 2); if (! (isointeger(beg_offset_term))) xsb_abort("[SUBSTRING] Arg 2 (the beginning offset) must be an integer"); beg_offset = oint_val(beg_offset_term); if (beg_offset < 0) beg_offset = 0; else if (beg_offset > input_len) beg_offset = input_len; /* arg 3: ending offset */ end_offset_term = reg_term(CTXTc 3); if (isref(end_offset_term)) end_offset = input_len; else if (! (isointeger(end_offset_term))) xsb_abort("[SUBSTRING] Arg 3 (the end offset) must be integer or _"); else end_offset = oint_val(end_offset_term); if (end_offset < 0) end_offset = input_len + 1 + end_offset; else if (end_offset > input_len) end_offset = input_len; else if (end_offset < beg_offset) end_offset = beg_offset; output_term = reg_term(CTXTc 4); if (! isref(output_term)) xsb_abort("[SUBSTRING] Arg 4 (the output string) must be an unbound variable"); /* do the actual replacement */ substring_len = end_offset-beg_offset; XSB_StrAppendBlk(&output_buffer, input_string+beg_offset, (int)substring_len); XSB_StrNullTerminate(&output_buffer); /* get result out */ if (conversion_required) c_string_to_p_charlist(CTXTc output_buffer.string, output_term, 4, "SUBSTRING", "Arg 4"); else c2p_string(CTXTc output_buffer.string, output_term); return(TRUE); }