static cst_val *cmu_LANGNAME_tokentowords(cst_item *token, const char *name) { /* Return list of words that expand token/name */ cst_val *r; /* printf("token_name %s name %s\n",item_name(token),name); */ if (item_feat_present(token,"phones")) return cons_val(string_val(name),NULL); #if 0 if (item_feat_present(token,"nsw")) nsw = item_feat_string(token,"nsw"); utt = item_utt(token); lex = val_lexicon(feat_val(utt->features,"lexicon")); #endif if (cst_strlen(name) > 0) r = cons_val(string_val(name),0); else r = NULL; return r; }
cst_val *en_exp_real(const char *numstring) { char *aaa, *p; cst_val *r; if (numstring && (numstring[0] == '-')) r = cons_val(string_val("minus"), en_exp_real(&numstring[1])); else if (numstring && (numstring[0] == '+')) r = cons_val(string_val("plus"), en_exp_real(&numstring[1])); else if (((p=strchr(numstring,'e')) != 0) || ((p=strchr(numstring,'E')) != 0)) { aaa = cst_strdup(numstring); aaa[cst_strlen(numstring)-cst_strlen(p)] = '\0'; r = val_append(en_exp_real(aaa), cons_val(string_val("e"), en_exp_real(p+1))); cst_free(aaa); } else if ((p=strchr(numstring,'.')) != 0) { aaa = cst_strdup(numstring); aaa[cst_strlen(numstring)-cst_strlen(p)] = '\0'; r = val_append(en_exp_number(aaa), cons_val(string_val("point"), en_exp_digits(p+1))); cst_free(aaa); } else r = en_exp_number(numstring); /* I don't think you can get here */ return r; }
static inline STORAGE_HANDLE *get_storage_handle(CTXTdeclc Cell name, prolog_term trie_type) { STORAGE_HANDLE *handle_cell; handle_cell = find_or_insert_storage_handle(name); /* new buckets are filled out with 0's by the calloc in hashtable_xsb.c */ if (handle_cell->handle==(Cell)0) { /* initialize new handle */ xsb_dbgmsg((LOG_STORAGE, "GET_STORAGE_HANDLE: New trie created for %s\n", string_val(name))); if (is_int(trie_type)) handle_cell->handle= newtrie(CTXTc (int)p2c_int(trie_type)); else xsb_abort("[GET_STORAGE_HANDLE] trie type (3d arg) must be an integer"); /* Note: not necessary to initialize snapshot_number&changed: handle_cell was calloc()'ed handle_cell->snapshot_number=0; handle_cell->changed=FALSE; */ } else xsb_dbgmsg((LOG_STORAGE, "GET_STORAGE_HANDLE: Using existing trie for %s\n", string_val(name))); return handle_cell; }
/* Input: Arg1: +Substr Arg2: + String Arg3: +forward/reverse (checks only f/r) f means the first match from the start of String r means the first match from the end of String Output: Arg4: Beg Beg is the offset where Substr matches. Must be a variable or an integer Arg5: End End is the offset of the next character after the end of Substr Must be a variable or an integer. Both Beg and End can be negative, in which case they represent the offset from the 2nd character past the end of String. For instance, -1 means the next character past the end of String, so End = -1 means that Substr must be a suffix of String.. The meaning of End and of negative offsets is consistent with substring and string_substitute predicates. */ xsbBool str_match(CTXTdecl) { static char *subptr, *stringptr, *direction, *matchptr; static size_t substr_beg, substr_end; int reverse=TRUE; /* search in reverse */ int beg_bos_offset=TRUE; /* measure beg offset from the beg of string */ int end_bos_offset=TRUE; /* measure end offset from the beg of string */ Integer str_len, sub_len; /* length of string and substring */ Cell beg_offset_term, end_offset_term; term = ptoc_tag(CTXTc 1); term2 = ptoc_tag(CTXTc 2); term3 = ptoc_tag(CTXTc 3); beg_offset_term = ptoc_tag(CTXTc 4); end_offset_term = ptoc_tag(CTXTc 5); if (!isatom(term) || !isatom(term2) || !isatom(term3)) { xsb_abort("STR_MATCH: Arguments 1,2,3 must be bound to strings"); } subptr = string_val(term); stringptr = string_val(term2); direction = string_val(term3); if (*direction == 'f') reverse=FALSE; else if (*direction != 'r') xsb_abort("STR_MATCH: Argument 3 must be bound to forward/reverse"); str_len=strlen(stringptr); sub_len=strlen(subptr); if (isointeger(beg_offset_term)) { if (oint_val(beg_offset_term) < 0) { beg_bos_offset = FALSE; } } if (isointeger(end_offset_term)) { if (oint_val(end_offset_term) < 0) { end_bos_offset = FALSE; } } if (reverse) matchptr = xsb_strrstr(stringptr, subptr); else matchptr = strstr(stringptr, subptr); if (matchptr == NULL) return FALSE; substr_beg = (beg_bos_offset? matchptr - stringptr : -(str_len - (matchptr - stringptr)) ); substr_end = (end_bos_offset? (matchptr - stringptr) + sub_len : -(str_len + 1 - (matchptr - stringptr) - sub_len) ); return (p2p_unify(CTXTc beg_offset_term, makeint(substr_beg)) && p2p_unify(CTXTc end_offset_term, makeint(substr_end))); }
cst_val *en_exp_letters(const char *lets) { /* returns these as list of single char symbols */ char *aaa; cst_val *r; int i; aaa = cst_alloc(char,2); aaa[1] = '\0'; for (r=0,i=0; lets[i] != '\0'; i++) { aaa[0] = lets[i]; if (isupper((int)aaa[0])) aaa[0] = tolower((int)aaa[0]); if (strchr("0123456789",aaa[0])) r = cons_val(string_val(digit2num[aaa[0]-'0']),r); else if (cst_streq(aaa,"a")) r = cons_val(string_val("_a"),r); else r = cons_val(string_val(aaa),r); } cst_free(aaa); return val_reverse(r); }
int prolog_call0(CTXTdeclc Cell term) { Psc psc; if (isconstr(term)) { int disp; char *addr; psc = get_str_psc(term); addr = (char *)(clref_val(term)); for (disp = 1; disp <= (int)get_arity(psc); ++disp) { bld_copy(reg+disp, cell((CPtr)(addr)+disp)); } } else if (isstring(term)) { int value; Pair sym; if (string_val(term) == true_string) return TRUE; /* short-circuit if calling "true" */ sym = insert(string_val(term),0,(Psc)flags[CURRENT_MODULE],&value); psc = pair_psc(sym); } else { if (isnonvar(term)) xsb_type_error(CTXTc "callable",term,"call/1",1); else xsb_instantiation_error(CTXTc "call/1",1); return FALSE; } #ifdef CP_DEBUG pscreg = psc; #endif pcreg = get_ep(psc); if (asynint_val) intercept(CTXTc psc); return TRUE; }
cst_val *lex_lookup(const cst_lexicon *l, const char *word, const char *pos, const cst_features *feats) { int index; int p; const char *q; char *wp; cst_val *phones = 0; int found = FALSE; wp = cst_alloc(char,cst_strlen(word)+2); cst_sprintf(wp,"%c%s",(pos ? pos[0] : '0'),word); if (l->addenda) phones = lex_lookup_addenda(wp,l,&found); if (!found) { index = lex_lookup_bsearch(l,wp); if (index >= 0) { if (l->phone_hufftable) { for (p=index-2; l->data[p]; p--) for (q=l->phone_hufftable[l->data[p]]; *q; q++) phones = cons_val(string_val(l->phone_table[(unsigned char)*q]), phones); } else /* no compression -- should we still support this ? */ { for (p=index-2; l->data[p]; p--) phones = cons_val(string_val(l->phone_table[l->data[p]]), phones); } phones = val_reverse(phones); } else if (l->lts_function) { phones = (l->lts_function)(l,word,"",feats); } else if (l->lts_rule_set) { phones = lts_apply(word, "", /* more features if we had them */ l->lts_rule_set); } } cst_free(wp); return phones; }
static cst_val *add_lts_boundary_marks(const cst_val *l) { cst_val *l1; const cst_val *v; l1 = cons_val(string_val("#"),NULL); for (v=l;v;v=val_cdr(v)) { l1=cons_val(val_car(v),l1); } l1 = cons_val(string_val("#"),l1); l1 = val_reverse(l1); return l1; }
void printTrieSymbol(FILE *fp, Cell symbol) { if ( symbol == ESCAPE_NODE_SYMBOL ) fprintf(fp, "%lu [ESCAPE_NODE_SYMBOL]", ESCAPE_NODE_SYMBOL); else { switch(TrieSymbolType(symbol)) { case XSB_INT: fprintf(fp, IntegerFormatString, int_val(symbol)); break; case XSB_FLOAT: fprintf(fp, "%f", float_val(symbol)); break; case XSB_STRING: fprintf(fp, "%s", string_val(symbol)); break; case XSB_TrieVar: fprintf(fp, "V" IntegerFormatString, DecodeTrieVar(symbol)); break; case XSB_STRUCT: { Psc psc = DecodeTrieFunctor(symbol); fprintf(fp, "%s/%d", get_name(psc), get_arity(psc)); } break; case XSB_LIST: fprintf(fp, "LIST"); break; default: fprintf(fp, "Unknown symbol (tag = %ld)", cell_tag(symbol)); break; } } }
cst_val *en_exp_digits(const char *numstring) { /* Expand given token to list of words pronouncing it as digits */ cst_val *d = 0; const char *p; for (p=numstring; *p; p++) { if ((*p >= '0') && (*p <= '9')) d = cons_val(string_val(digit2num[*p-'0']),d); else d = cons_val(string_val("umpty"),d); } return val_reverse(d); }
cst_val *get_wavelist(const char *wavelistfile) { cst_val *l = 0; cst_tokenstream *ts; const char *token; int i=0; ts = ts_open(wavelistfile); if (!ts) { fprintf(stderr,"combine_waves: can't open \"%s\"\n",wavelistfile); return 0; } while ((token=ts_get(ts)) != 0) { l = cons_val(string_val(token),l); i++; } if (i%2 != 0) { fprintf(stderr,"combine_waves: doesn't have matched pairs \"%s\"\n",wavelistfile); delete_val(l); l = 0; } ts_close(ts); return val_reverse(l); }
static cst_val *lex_lookup_addenda(const char *wp,const cst_lexicon *l, int *found) { /* For those other words */ int i,j; cst_val *phones; phones = NULL; for (i=0; l->addenda[i]; i++) { if (((wp[0] == '0') || (wp[0] == l->addenda[i][0][0]) || (l->addenda[i][0][0] == '0')) && (cst_streq(wp+1,l->addenda[i][0]+1))) { for (j=1; l->addenda[i][j]; j++) phones = cons_val(string_val(l->addenda[i][j]),phones); *found = TRUE; return val_reverse(phones); } } return NULL; }
STORAGE_HANDLE *storage_builtin(CTXTdeclc int builtin_number, Cell name, prolog_term trie_type) { switch (builtin_number) { case GET_STORAGE_HANDLE: return get_storage_handle(CTXTc name, trie_type); case INCREMENT_STORAGE_SNAPSHOT: return increment_storage_snapshot(CTXTc name); case MARK_STORAGE_CHANGED: return mark_storage_changed(CTXTc name); case DESTROY_STORAGE_HANDLE: { xsb_dbgmsg((LOG_STORAGE, "STORAGE_BUILTIN: Destroying storage handle for %s\n", string_val(name))); destroy_storage_handle(name); return NULL; } case SHOW_TABLE_STATE: { show_table_state(); return NULL; } default: xsb_abort("Unknown storage builtin"); return NULL; } }
xsbBucket *search_bucket(Cell name, xsbHashTable *table, enum xsbHashSearchOp search_op) { xsbBucket *bucket, *prev; if (! table->initted) init_hashtable(table); prev = NULL; bucket = get_top_bucket(table,table_hash(name,table->length)); while (bucket && bucket->name) { if (bucket->name == name) { if (search_op == hashtable_delete) { if (!prev) { /* if deleting a top bucket, copy the next bucket into the top one and delete that next bucket. If no next, then just nullify name */ prev = bucket; bucket=bucket->next; if (bucket) { /* use memcpy() because client bucket might have extra fields */ memcpy(prev, bucket, table->bucket_size); free(bucket); } else { mark_bucket_free(prev,table->bucket_size); xsb_dbgmsg((LOG_HASHTABLE, "SEARCH_BUCKET: Destroying storage handle for %s\n", string_val(name))); xsb_dbgmsg((LOG_HASHTABLE, "SEARCH_BUCKET: Bucket nameptr is %p, next bucket %p\n", prev->name, prev->next)); } } else { /* Not top bucket: rearrange pointers & free space */ prev->next = bucket->next; free(bucket); } return NULL; } else return bucket; } prev = bucket; bucket = bucket->next; } /* not found */ if (search_op != hashtable_insert) return NULL; /* else create new bucket */ /* calloc nullifies the allocated space; CLIENTS RELY ON THIS */ if (!bucket) { /* i.e., it is not a top bucket */ bucket = (xsbBucket *)calloc(1,table->bucket_size); if (!bucket) xsb_exit("Out of Memory: Can't allocate hash bucket"); prev->next = bucket; /* NOTE: not necessary to nullify bucket->next because of calloc() */ } bucket->name = name; return bucket; }
xsbBool str_cat(CTXTdecl) { char *str1, *str2, *tmpstr; size_t tmpstr_len; term = ptoc_tag(CTXTc 1); term2 = ptoc_tag(CTXTc 2); if (isatom(term) && isatom(term2)) { str1 = string_val(term); str2 = string_val(term2); tmpstr_len = strlen(str1) + strlen(str2) + 1; tmpstr = (char *)mem_alloc(tmpstr_len,LEAK_SPACE); strcpy(tmpstr, str1); strcat(tmpstr, str2); str1 = string_find(tmpstr, 1); mem_dealloc(tmpstr,tmpstr_len,LEAK_SPACE); return atom_unify(CTXTc makestring(str1), ptoc_tag(CTXTc 3)); } else return FALSE; }
cst_val *cst_args(char **argv, int argc, const char *description, cst_features *args) { /* parses the given arguments wrt the description */ cst_features *op_types = new_features(); cst_val *files = NULL; int i; const char *type; parse_description(description,op_types); for (i=1; i<argc; i++) { if (argv[i][0] == '-') { if ((!feat_present(op_types,argv[i])) || (cst_streq("-h",argv[i])) || (cst_streq("-?",argv[i])) || (cst_streq("--help",argv[i])) || (cst_streq("-help",argv[i]))) parse_usage(argv[0],"","",description); else { type = feat_string(op_types,argv[i]); if (cst_streq("<binary>",type)) feat_set_string(args,argv[i],"true"); else { if (i+1 == argc) parse_usage(argv[0], "missing argument for ",argv[i], description); if (cst_streq("<int>",type)) feat_set_int(args,argv[i],atoi(argv[i+1])); else if (cst_streq("<float>",type)) feat_set_float(args,argv[i],atof(argv[i+1])); else if (cst_streq("<string>",type)) feat_set_string(args,argv[i],argv[i+1]); else parse_usage(argv[0], "unknown arg type ",type, description); i++; } } } else files = cons_val(string_val(argv[i]),files); } delete_features(op_types); return val_reverse(files); }
cst_val* ustring32_lts_apply(const ustring32_t u32,const cst_lts_rewrites *rule) { size_t n=ustring32_length(u32); if(n==0) return NULL; cst_val *l=cons_val(string_val("#"),NULL); uint8_t b[8]; size_t i=n; int k; do { i--; k=u8_uctomb(b,ustring32_at(u32,i),sizeof(b)); b[k]='\0'; l=cons_val(string_val((char*)b),l); } while(i); l=cons_val(string_val("#"),l); cst_val *output=lts_rewrites(l, rule); delete_val(l); return output; }
cst_val *en_exp_id(const char *numstring) { /* Expand numstring as pairs as in years or ids */ char aaa[3]; if ((strlen(numstring) == 4) && (numstring[2] == '0') && (numstring[3] == '0')) { if (numstring[1] == '0') return en_exp_number(numstring); /* 2000, 3000 */ else { aaa[0] = numstring[0]; aaa[1] = numstring[1]; aaa[2] = '\0'; return val_append(en_exp_number(aaa), cons_val(string_val("hundred"),0)); } } else if ((strlen(numstring) == 2) && (numstring[0] == '0')) return cons_val(string_val("oh"), en_exp_digits(&numstring[1])); else if (((strlen(numstring) == 4) && ((numstring[1] == '0'))) || (strlen(numstring) < 3)) return en_exp_number(numstring); else if (strlen(numstring)%2 == 1) { return cons_val(string_val(digit2num[numstring[0]-'0']), en_exp_id(&numstring[1])); } else { aaa[0] = numstring[0]; aaa[1] = numstring[1]; aaa[2] = '\0'; return val_append(en_exp_number(aaa),en_exp_id(&numstring[2])); } }
cst_val *lts_rewrites_word(const char *word, const cst_lts_rewrites *r) { cst_val *w, *p; char x[2]; int i; x[1] = '\0'; w = cons_val(string_val("#"),NULL); for (i=0; word[i]; i++) { x[0] = word[i]; w = cons_val(string_val(x),w); } w = cons_val(string_val("#"),w); w = val_reverse(w); p = lts_rewrites(w,r); delete_val(w); return p; }
Pair link_sym(Psc psc, Psc mod_psc) { Pair *search_ptr, found_pair; char *name; byte arity, global_flag, type; SYS_MUTEX_LOCK_NOERROR( MUTEX_SYMBOL ) ; name = get_name(psc); arity = get_arity(psc); if ( (global_flag = is_globalmod(mod_psc)) ) { search_ptr = (Pair *)symbol_table.table + hash(name, arity, symbol_table.size); } else search_ptr = (Pair *)&get_data(mod_psc); if ((found_pair = search(arity, name, search_ptr))) { if (pair_psc(found_pair) != psc) { /* * Invalidate the old name!! It is no longer accessible * through the global chain. */ type = get_type(pair_psc(found_pair)); if ( type != T_ORDI ) { char message[220], modmsg[200]; if (type == T_DYNA || type == T_PRED) { Psc mod_psc; mod_psc = (Psc) get_data(pair_psc(found_pair)); if (mod_psc == 0) snprintf(modmsg,200,"%s","usermod"); else if (isstring(mod_psc)) snprintf(modmsg,200,"usermod from file: %s",string_val(mod_psc)); else snprintf(modmsg,200,"module: %s",get_name(mod_psc)); snprintf(message,220, "%s/%d (type %d) had been defined in %s", name, arity, type, modmsg); } else snprintf(message,220, "%s/%d (type %d) had been defined in another module!", name, arity, type); xsb_warn(message); } pair_psc(found_pair) = psc; } } else { found_pair = make_psc_pair(psc, search_ptr); if (global_flag) symbol_table_increment_and_check_for_overflow; } SYS_MUTEX_UNLOCK_NOERROR( MUTEX_SYMBOL ) ; return found_pair; } /* link_sym */
cst_val *en_exp_ordinal(const char *rawnumstring) { /* return ordinal for digit string */ cst_val *card, *o; const cst_val *t; const char *l; const char *ord; char *numstring; int i,j; numstring = cst_strdup(rawnumstring); for (j=i=0; i < cst_strlen(rawnumstring); i++) if (rawnumstring[i] != ',') { numstring[j] = rawnumstring[i]; j++; } numstring[j] = '\0'; card = val_reverse(en_exp_number(numstring)); cst_free(numstring); l = val_string(val_car(card)); ord = 0; for (i=0; i<10; i++) if (cst_streq(l,digit2num[i])) ord = ord2num[i]; if (!ord) for (i=0; i<10; i++) if (cst_streq(l,digit2teen[i])) ord = ord2teen[i]; if (!ord) for (i=0; i<10; i++) if (cst_streq(l,digit2enty[i])) ord = ord2enty[i]; if (cst_streq(l,"hundred")) ord = "hundredth"; if (cst_streq(l,"thousand")) ord = "thousandth"; if (cst_streq(l,"billion")) ord = "billtionth"; if (!ord) /* dunno, so don't convert anything */ return card; o = cons_val(string_val(ord),0); for (t=val_cdr(card); t; t=val_cdr(t)) o = cons_val(val_car(t),o); delete_val(card); return o; }
void ItemLocation::ToItemJson(rapidjson::Value *root_ptr, rapidjson_allocator &alloc) { auto &root = *root_ptr; rapidjson::Value string_val(rapidjson::kStringType); root.AddMember("_type", static_cast<int>(type_), alloc); if (type_ == ItemLocationType::STASH) { root.AddMember("_tab", tab_id_, alloc); string_val.SetString(tab_label_.c_str(), alloc); root.AddMember("_tab_label", string_val, alloc); } else { string_val.SetString(character_.c_str(), alloc); root.AddMember("_character", string_val, alloc); } if (socketed_) { root.AddMember("_x", x_, alloc); root.AddMember("_y", y_, alloc); } root.AddMember("_socketed", socketed_, alloc); }
int in_reg2_list(CTXTdeclc Psc psc) { Cell list,term; list = reg[2]; XSB_Deref(list); if (isnil(list)) return TRUE; /* if filter is empty, return all */ while (!isnil(list)) { term = get_list_head(list); XSB_Deref(term); if (isconstr(term)) { if (psc == get_str_psc(term)) return TRUE; } else if (isstring(term)) { if (get_name(psc) == string_val(term)) return TRUE; } list = get_list_tail(list); } return FALSE; }
cst_val *cst_utf8_explode(const cst_string *utf8string) { /* return a list of utf-8 characters as strings */ const unsigned char *xxx = (const unsigned char *)utf8string; cst_val *chars=NULL; int i, l=0; char utf8char[5]; #import "OpenEarsStaticAnalysisToggle.h" #ifdef STATICANALYZEDEPENDENCIES #define __clang_analyzer__ 1 #endif #if !defined(__clang_analyzer__) || defined(STATICANALYZEDEPENDENCIES) #undef __clang_analyzer__ for (i=0; xxx[i]; i++) { if (xxx[i] < 0x80) /* one byte */ { sprintf(utf8char,"%c",xxx[i]); l = 1; } else if (xxx[i] < 0xe0) /* two bytes */ { sprintf(utf8char,"%c%c",xxx[i],xxx[i+1]); i++; l = 2; } else if (xxx[i] < 0xff) /* three bytes */ { sprintf(utf8char,"%c%c%c",xxx[i],xxx[i+1],xxx[i+2]); i++; i++; l = 3; } else { sprintf(utf8char,"%c%c%c%c",xxx[i],xxx[i+1],xxx[i+2],xxx[i+3]); i++; i++; i++; l = 4; } chars = cons_val(string_val(utf8char),chars); } return val_reverse(chars); #endif }
int sprintTrieSymbol(char * buffer, Cell symbol) { int ctr; if ( symbol == ESCAPE_NODE_SYMBOL ) return sprintf(buffer, "%lu [ESCAPE_NODE_SYMBOL]", ESCAPE_NODE_SYMBOL); else { switch(TrieSymbolType(symbol)) { case XSB_INT: return sprintf(buffer, IntegerFormatString, int_val(symbol)); break; case XSB_FLOAT: return sprintf(buffer, "%f", float_val(symbol)); break; case XSB_STRING: return sprintf(buffer, "%s", string_val(symbol)); break; case XSB_TrieVar: return sprintf(buffer, "_V" IntegerFormatString, DecodeTrieVar(symbol)); break; case XSB_STRUCT: { Psc psc; if (isboxedfloat(symbol)) { return sprintf(buffer, "%lf", boxedfloat_val(symbol)); break; } psc = DecodeTrieFunctor(symbol); ctr = sprint_quotedname(buffer, 0, get_name(psc)); return sprintf(buffer+ctr, "/%d", get_arity(psc)); } break; case XSB_LIST: return sprintf(buffer, "LIST"); break; default: return sprintf(buffer, "Unknown symbol (tag = %" Intfmt")", cell_tag(symbol)); break; } } }
cst_val *cmu_grapheme_lex_lts_function(const struct lexicon_struct *l, const char *word, const char *pos, const cst_features *feats) { cst_val *phones = 0; cst_val *utflets = 0; const cst_val *v; char ord[10]; int i,phindex; /* string to utf8 chars */ utflets = cst_utf8_explode(word); for (v=utflets; v; v=val_cdr(v)) { /* We will add the found phones in reverse order and reverse then */ /* afterwards */ cst_utf8_as_hex(val_string(val_car(v)),ord); phindex = cst_find_u2sampa(ord); if (phindex < 0) printf("awb_debug no sampa %s %s\n",val_string(val_car(v)),ord); for (i=4; (phindex>=0) && (i>0); i--) { if (unicode_sampa_mapping[phindex][i]) phones = cons_val(string_val(unicode_sampa_mapping[phindex][i]), phones); } } phones = val_reverse(phones); #if 1 printf("cmu_grapheme_lex.c: word \"%s\" ",word); val_print(stdout,phones); printf("\n"); #endif delete_val(utflets); return phones; }
static int make_flags(prolog_term flag_term, char *context) { int flags = 0; prolog_term aux_list=flag_term, head_trm; char *head; #ifdef MULTI_THREAD if( NULL == th) th = xsb_get_main_thread(); #endif if (is_var(flag_term)) return REG_EXTENDED; else if (is_int(flag_term)) return (REG_EXTENDED | REG_ICASE); if (is_nil(flag_term)) return 0; /* basic, case-sensitive */ if (! is_list(flag_term)) xsb_abort("[%s] Arg 4 (flags) must be a variable, an integer, or a list", context); do { head_trm = p2p_car(aux_list); aux_list = p2p_cdr(aux_list); if (!is_string(head_trm)) xsb_abort("[%s] Arg 4: allowed flags are `extended' and `ignorecase'", context); head = string_val(head_trm); if (strcmp(head,"extended")==0) flags = flags | REG_EXTENDED; else if (strcmp(head,"ignorecase")==0) flags = flags | REG_ICASE; } while (!is_nil(aux_list)); return flags; }
cst_val *cst_utf8_explode(const cst_string *utf8string) { /* return a list of utf-8 characters as strings */ const unsigned char *xxx = (const unsigned char *)utf8string; cst_val *chars=NULL; int i, l=0; char utf8char[5]; for (i=0; xxx[i]; i++) { if (xxx[i] < 0x80) /* one byte */ { sprintf(utf8char,"%c",xxx[i]); l = 1; } else if (xxx[i] < 0xe0) /* two bytes */ { sprintf(utf8char,"%c%c",xxx[i],xxx[i+1]); i++; l = 2; } else if (xxx[i] < 0xff) /* three bytes */ { sprintf(utf8char,"%c%c%c",xxx[i],xxx[i+1],xxx[i+2]); i++; i++; l = 3; } else { sprintf(utf8char,"%c%c%c%c",xxx[i],xxx[i+1],xxx[i+2],xxx[i+3]); i++; i++; i++; l = 4; } chars = cons_val(string_val(utf8char),chars); } return val_reverse(chars); }
/* XSB string substitution entry point In: Arg1: string Arg2: beginning offset Arg3: ending offset. < 0 means end of string Out: Arg4: new (output) string Always succeeds, unless error. */ int do_regsubstring__(void) { #ifdef MULTI_THREAD if( NULL == th) th = xsb_get_main_thread(); #endif /* Prolog args are first assigned to these, so we could examine the types of these objects to determine if we got strings or atoms. */ prolog_term input_term, output_term; prolog_term beg_offset_term, end_offset_term; char *input_string=NULL; /* string where matches are to be found */ int beg_offset, end_offset, input_len, substring_len; int conversion_required=FALSE; XSB_StrSet(&output_buffer,""); input_term = reg_term(CTXTc 1); /* Arg1: string to find matches in */ if (is_string(input_term)) /* check it */ input_string = string_val(input_term); else if (is_list(input_term)) { input_string = p_charlist_to_c_string(CTXTc input_term, &input_buffer, "RE_SUBSTRING", "input string"); conversion_required = TRUE; } else xsb_abort("[RE_SUBSTRING] Arg 1 (the input string) must be an atom or a character list"); input_len = strlen(input_string); /* arg 2: beginning offset */ beg_offset_term = reg_term(CTXTc 2); if (! is_int(beg_offset_term)) xsb_abort("[RE_SUBSTRING] Arg 2 (the beginning offset) must be an integer"); beg_offset = int_val(beg_offset_term); if (beg_offset < 0 || beg_offset > input_len) xsb_abort("[RE_SUBSTRING] Arg 2 (=%d) must be between 0 and %d", beg_offset, input_len); /* arg 3: ending offset */ end_offset_term = reg_term(CTXTc 3); if (! is_int(end_offset_term)) xsb_abort("[RE_SUBSTRING] Arg 3 (the ending offset) must be an integer"); end_offset = int_val(end_offset_term); if (end_offset < 0) end_offset = input_len; else if (end_offset > input_len || end_offset < beg_offset) xsb_abort("[RE_SUBSTRING] Arg 3 (=%d) must be < 0 or between %d and %d", end_offset, beg_offset, input_len); output_term = reg_term(CTXTc 4); if (! is_var(output_term)) xsb_abort("[RE_SUBSTRING] Arg 4 (the output string) must be an unbound variable"); /* do the actual replacement */ substring_len = end_offset-beg_offset; XSB_StrAppendBlk(&output_buffer, input_string+beg_offset, substring_len); XSB_StrNullTerminate(&output_buffer); /* get result out */ if (conversion_required) c_string_to_p_charlist(CTXTc output_buffer.string, output_term, 4, "RE_SUBSTITUTE", "Arg 4"); else /* DO NOT intern. When atom table garbage collection is in place, then replace the instruction with this: c2p_string(output_buffer, output_term); The reason for not interning is that in Web page manipulation it is often necessary to process the same string many times. This can cause atom table overflow. Not interning allws us to circumvent the problem. */ ctop_string(CTXTc 4, output_buffer.string); return(TRUE); }
/* XSB string substitution entry point: replace substrings specified in Arg2 with strings in Arg3. In: Arg1: string Arg2: substring specification, a list [s(B1,E1),s(B2,E2),...] Arg3: list of replacement string Out: Arg4: new (output) string Always succeeds, unless error. */ int do_regsubstitute__(void) { #ifdef MULTI_THREAD if( NULL == th) th = xsb_get_main_thread(); #endif /* Prolog args are first assigned to these, so we could examine the types of these objects to determine if we got strings or atoms. */ prolog_term input_term, output_term; prolog_term subst_reg_term, subst_spec_list_term, subst_spec_list_term1; prolog_term subst_str_term=(prolog_term)0, subst_str_list_term, subst_str_list_term1; char *input_string=NULL; /* string where matches are to be found */ char *subst_string=NULL; prolog_term beg_term, end_term; int beg_offset=0, end_offset=0, input_len; int last_pos = 0; /* last scanned pos in input string */ /* the output buffer is made large enough to include the input string and the substitution string. */ int conversion_required=FALSE; /* from C string to Prolog char list */ XSB_StrSet(&output_buffer,""); input_term = reg_term(CTXTc 1); /* Arg1: string to find matches in */ if (is_string(input_term)) /* check it */ input_string = string_val(input_term); else if (is_list(input_term)) { input_string = p_charlist_to_c_string(CTXTc input_term, &input_buffer, "RE_SUBSTITUTE", "input string"); conversion_required = TRUE; } else xsb_abort("[RE_SUBSTITUTE] Arg 1 (the input string) must be an atom or a character list"); input_len = strlen(input_string); /* arg 2: substring specification */ subst_spec_list_term = reg_term(CTXTc 2); if (!is_list(subst_spec_list_term) && !is_nil(subst_spec_list_term)) xsb_abort("[RE_SUBSTITUTE] Arg 2 must be a list [s(B1,E1),s(B2,E2),...]"); /* handle substitution string */ subst_str_list_term = reg_term(CTXTc 3); if (! is_list(subst_str_list_term)) xsb_abort("[RE_SUBSTITUTE] Arg 3 must be a list of strings"); output_term = reg_term(CTXTc 4); if (! is_var(output_term)) xsb_abort("[RE_SUBSTITUTE] Arg 4 (the output) must be an unbound variable"); subst_spec_list_term1 = subst_spec_list_term; subst_str_list_term1 = subst_str_list_term; if (is_nil(subst_spec_list_term1)) { XSB_StrSet(&output_buffer, input_string); goto EXIT; } if (is_nil(subst_str_list_term1)) xsb_abort("[RE_SUBSTITUTE] Arg 3 must not be an empty list"); do { subst_reg_term = p2p_car(subst_spec_list_term1); subst_spec_list_term1 = p2p_cdr(subst_spec_list_term1); if (!is_nil(subst_str_list_term1)) { subst_str_term = p2p_car(subst_str_list_term1); subst_str_list_term1 = p2p_cdr(subst_str_list_term1); if (is_string(subst_str_term)) { subst_string = string_val(subst_str_term); } else if (is_list(subst_str_term)) { subst_string = p_charlist_to_c_string(CTXTc subst_str_term, &subst_buf, "RE_SUBSTITUTE", "substitution string"); } else xsb_abort("[RE_SUBSTITUTE] Arg 3 must be a list of strings"); } beg_term = p2p_arg(subst_reg_term,1); end_term = p2p_arg(subst_reg_term,2); if (!is_int(beg_term) || !is_int(end_term)) xsb_abort("[RE_SUBSTITUTE] Non-integer in Arg 2"); else{ beg_offset = int_val(beg_term); end_offset = int_val(end_term); } /* -1 means end of string */ if (end_offset < 0) end_offset = input_len; if ((end_offset < beg_offset) || (beg_offset < last_pos)) xsb_abort("[RE_SUBSTITUTE] Substitution regions in Arg 2 not sorted"); /* do the actual replacement */ XSB_StrAppendBlk(&output_buffer,input_string+last_pos,beg_offset-last_pos); XSB_StrAppend(&output_buffer, subst_string); last_pos = end_offset; } while (!is_nil(subst_spec_list_term1)); XSB_StrAppend(&output_buffer, input_string+end_offset); EXIT: /* get result out */ if (conversion_required) c_string_to_p_charlist(CTXTc output_buffer.string, output_term, 4, "RE_SUBSTITUTE", "Arg 4"); else /* DO NOT intern. When atom table garbage collection is in place, then replace the instruction with this: c2p_string(output_buffer, output_term); The reason for not interning is that in Web page manipulation it is often necessary to process the same string many times. This can cause atom table overflow. Not interning allws us to circumvent the problem. */ ctop_string(CTXTc 4, output_buffer.string); return(TRUE); }