static cst_val *cmu_LANGNAME_tokentowords(cst_item *token, const char *name)
{
    /* Return list of words that expand token/name */
    cst_val *r;

    /* printf("token_name %s name %s\n",item_name(token),name); */

    if (item_feat_present(token,"phones"))
	return cons_val(string_val(name),NULL);

#if 0
    if (item_feat_present(token,"nsw"))
	nsw = item_feat_string(token,"nsw");

    utt = item_utt(token);
    lex = val_lexicon(feat_val(utt->features,"lexicon"));
#endif

    if (cst_strlen(name) > 0)
        r = cons_val(string_val(name),0);
    else
        r = NULL;
    
    return r;
}
Exemple #2
0
cst_val *en_exp_real(const char *numstring)
{
    char *aaa, *p;
    cst_val *r;

    if (numstring && (numstring[0] == '-'))
	r = cons_val(string_val("minus"),
		     en_exp_real(&numstring[1]));
    else if (numstring && (numstring[0] == '+'))
	r = cons_val(string_val("plus"),
		     en_exp_real(&numstring[1]));
    else if (((p=strchr(numstring,'e')) != 0) ||
	     ((p=strchr(numstring,'E')) != 0))
    {
	aaa = cst_strdup(numstring);
	aaa[cst_strlen(numstring)-cst_strlen(p)] = '\0';
	r = val_append(en_exp_real(aaa),
		       cons_val(string_val("e"),
				en_exp_real(p+1)));
	cst_free(aaa);
    }
    else if ((p=strchr(numstring,'.')) != 0)
    {
	aaa = cst_strdup(numstring);
	aaa[cst_strlen(numstring)-cst_strlen(p)] = '\0';
	r = val_append(en_exp_number(aaa),
		       cons_val(string_val("point"),
				en_exp_digits(p+1)));
	cst_free(aaa);
    }
    else
	r = en_exp_number(numstring);  /* I don't think you can get here */

    return r;
}
Exemple #3
0
static inline STORAGE_HANDLE *get_storage_handle(CTXTdeclc Cell name, prolog_term trie_type)
{
    STORAGE_HANDLE *handle_cell;

    handle_cell = find_or_insert_storage_handle(name);
    /* new buckets are filled out with 0's by the calloc in hashtable_xsb.c */
    if (handle_cell->handle==(Cell)0) {
        /* initialize new handle */
        xsb_dbgmsg((LOG_STORAGE,
                    "GET_STORAGE_HANDLE: New trie created for %s\n",
                    string_val(name)));
        if (is_int(trie_type))
            handle_cell->handle= newtrie(CTXTc (int)p2c_int(trie_type));
        else
            xsb_abort("[GET_STORAGE_HANDLE] trie type (3d arg) must be an integer");

        /* Note: not necessary to initialize snapshot_number&changed: handle_cell
           was calloc()'ed
           handle_cell->snapshot_number=0;
           handle_cell->changed=FALSE;
        */
    }
    else
        xsb_dbgmsg((LOG_STORAGE,
                    "GET_STORAGE_HANDLE: Using existing trie for %s\n",
                    string_val(name)));
    return handle_cell;
}
Exemple #4
0
/*
  Input:
      Arg1: +Substr
      Arg2: + String
      Arg3: +forward/reverse (checks only f/r)
        f means the first match from the start of String
	r means the first match from the end of String
  Output:
      Arg4: Beg
        Beg is the offset where Substr matches. Must be a variable or an
	integer
      Arg5: End
	End is the offset of the next character after the end of Substr
	Must be a variable or an integer.

      Both Beg and End can be negative, in which case they represent the offset
      from the 2nd character past the end of String.
      For instance, -1 means the next character past the end of String,
      so End = -1 means that Substr must be a suffix of String..

      The meaning of End and of negative offsets is consistent with substring
      and string_substitute predicates.
*/
xsbBool str_match(CTXTdecl)
{
    static char *subptr, *stringptr, *direction, *matchptr;
    static size_t substr_beg, substr_end;
    int reverse=TRUE; /* search in reverse */
    int beg_bos_offset=TRUE; /* measure beg offset from the beg of string */
    int end_bos_offset=TRUE; /* measure end offset from the beg of string */
    Integer str_len, sub_len; /* length of string and substring */
    Cell beg_offset_term, end_offset_term;

    term = ptoc_tag(CTXTc 1);
    term2 = ptoc_tag(CTXTc 2);
    term3 = ptoc_tag(CTXTc 3);
    beg_offset_term = ptoc_tag(CTXTc 4);
    end_offset_term = ptoc_tag(CTXTc 5);
    if (!isatom(term) || !isatom(term2) || !isatom(term3)) {
        xsb_abort("STR_MATCH: Arguments 1,2,3 must be bound to strings");
    }
    subptr = string_val(term);
    stringptr = string_val(term2);
    direction = string_val(term3);

    if (*direction == 'f')
        reverse=FALSE;
    else if (*direction != 'r')
        xsb_abort("STR_MATCH: Argument 3 must be bound to forward/reverse");

    str_len=strlen(stringptr);
    sub_len=strlen(subptr);

    if (isointeger(beg_offset_term)) {
        if (oint_val(beg_offset_term) < 0) {
            beg_bos_offset = FALSE;
        }
    }
    if (isointeger(end_offset_term)) {
        if (oint_val(end_offset_term) < 0) {
            end_bos_offset = FALSE;
        }
    }

    if (reverse)
        matchptr = xsb_strrstr(stringptr, subptr);
    else
        matchptr = strstr(stringptr, subptr);

    if (matchptr == NULL) return FALSE;

    substr_beg = (beg_bos_offset?
                  matchptr - stringptr : -(str_len - (matchptr - stringptr))
                 );
    substr_end = (end_bos_offset?
                  (matchptr - stringptr) + sub_len
                  : -(str_len + 1 - (matchptr - stringptr) - sub_len)
                 );

    return
        (p2p_unify(CTXTc beg_offset_term, makeint(substr_beg))
         && p2p_unify(CTXTc end_offset_term, makeint(substr_end)));
}
Exemple #5
0
cst_val *en_exp_letters(const char *lets)
{
    /* returns these as list of single char symbols */
    char *aaa;
    cst_val *r;
    int i;

    aaa = cst_alloc(char,2);
    aaa[1] = '\0';
    for (r=0,i=0; lets[i] != '\0'; i++)
    {
	aaa[0] = lets[i];
	if (isupper((int)aaa[0])) 
	    aaa[0] = tolower((int)aaa[0]);
	if (strchr("0123456789",aaa[0]))
	    r = cons_val(string_val(digit2num[aaa[0]-'0']),r);
	else if (cst_streq(aaa,"a"))
	    r = cons_val(string_val("_a"),r);
	else
	    r = cons_val(string_val(aaa),r);
    }
    cst_free(aaa);

    return val_reverse(r);
}
int prolog_call0(CTXTdeclc Cell term)
{
    Psc  psc;
    if (isconstr(term)) {
      int  disp;
      char *addr;
      psc = get_str_psc(term);
      addr = (char *)(clref_val(term));
      for (disp = 1; disp <= (int)get_arity(psc); ++disp) {
	bld_copy(reg+disp, cell((CPtr)(addr)+disp));
      }
    } else if (isstring(term)) {
      int  value;
      Pair sym;
      if (string_val(term) == true_string) return TRUE; /* short-circuit if calling "true" */
      sym = insert(string_val(term),0,(Psc)flags[CURRENT_MODULE],&value);
      psc = pair_psc(sym);
    } else {
      if (isnonvar(term))
	xsb_type_error(CTXTc "callable",term,"call/1",1);
      else xsb_instantiation_error(CTXTc "call/1",1);
      return FALSE;
    }
#ifdef CP_DEBUG
    pscreg = psc;
#endif
    pcreg = get_ep(psc);
    if (asynint_val) intercept(CTXTc psc);
    return TRUE;
}
Exemple #7
0
cst_val *lex_lookup(const cst_lexicon *l, const char *word, const char *pos,
                    const cst_features *feats)
{
    int index;
    int p;
    const char *q;
    char *wp;
    cst_val *phones = 0;
    int found = FALSE;

    wp = cst_alloc(char,cst_strlen(word)+2);
    cst_sprintf(wp,"%c%s",(pos ? pos[0] : '0'),word);

    if (l->addenda)
        phones = lex_lookup_addenda(wp,l,&found);

    if (!found)
    {
        index = lex_lookup_bsearch(l,wp);

        if (index >= 0)
        {
            if (l->phone_hufftable)
            {
                for (p=index-2; l->data[p]; p--)
                    for (q=l->phone_hufftable[l->data[p]]; *q; q++)
                        phones = cons_val(string_val(l->phone_table[(unsigned char)*q]),
                                          phones);
            }
            else  /* no compression -- should we still support this ? */
            {
                for (p=index-2; l->data[p]; p--)
                    phones = cons_val(string_val(l->phone_table[l->data[p]]),
                                      phones);
            }
            phones = val_reverse(phones);
        }
        else if (l->lts_function)
        {
            phones = (l->lts_function)(l,word,"",feats);
        }
        else if (l->lts_rule_set)
        {
            phones = lts_apply(word,
                               "",  /* more features if we had them */
                               l->lts_rule_set);
        }
    }

    cst_free(wp);

    return phones;
}
Exemple #8
0
static cst_val *add_lts_boundary_marks(const cst_val *l)
{
    cst_val *l1;
    const cst_val *v;
    l1 = cons_val(string_val("#"),NULL);
    for (v=l;v;v=val_cdr(v))
      {
        l1=cons_val(val_car(v),l1);
      }
    l1 = cons_val(string_val("#"),l1);
    l1 = val_reverse(l1);
    return l1;
}
Exemple #9
0
void printTrieSymbol(FILE *fp, Cell symbol) {

  if ( symbol == ESCAPE_NODE_SYMBOL )
    fprintf(fp, "%lu [ESCAPE_NODE_SYMBOL]", ESCAPE_NODE_SYMBOL);
  else {
    switch(TrieSymbolType(symbol)) {
    case XSB_INT:
      fprintf(fp, IntegerFormatString, int_val(symbol));
      break;
    case XSB_FLOAT:
      fprintf(fp, "%f", float_val(symbol));
      break;
    case XSB_STRING:
      fprintf(fp, "%s", string_val(symbol));
      break;
    case XSB_TrieVar:
      fprintf(fp, "V" IntegerFormatString, DecodeTrieVar(symbol));
      break;
    case XSB_STRUCT:
      {
	Psc psc = DecodeTrieFunctor(symbol);
	fprintf(fp, "%s/%d", get_name(psc), get_arity(psc));
      }
      break;
    case XSB_LIST:
      fprintf(fp, "LIST");
      break;
    default:
      fprintf(fp, "Unknown symbol (tag = %ld)", cell_tag(symbol));
      break;
    }
  }
}
Exemple #10
0
cst_val *en_exp_digits(const char *numstring)
{
    /* Expand given token to list of words pronouncing it as digits */
    cst_val *d = 0;
    const char *p;

    for (p=numstring; *p; p++)
    {
	if ((*p >= '0') && (*p <= '9'))
	    d = cons_val(string_val(digit2num[*p-'0']),d);
	else
	    d = cons_val(string_val("umpty"),d);
    }

    return val_reverse(d);
}
Exemple #11
0
cst_val *get_wavelist(const char *wavelistfile)
{
    cst_val *l = 0;
    cst_tokenstream *ts;
    const char *token;
    int i=0;

    ts = ts_open(wavelistfile);
    if (!ts)
    {
	fprintf(stderr,"combine_waves: can't open \"%s\"\n",wavelistfile);
	return 0;
    }

    while ((token=ts_get(ts)) != 0)
    {
	l = cons_val(string_val(token),l);
	i++;
    }

    if (i%2 != 0)
    {
	fprintf(stderr,"combine_waves: doesn't have matched pairs \"%s\"\n",wavelistfile);
	delete_val(l);
	l = 0;
    }

    ts_close(ts);

    return val_reverse(l);
}
Exemple #12
0
static cst_val *lex_lookup_addenda(const char *wp,const cst_lexicon *l,
                                   int *found)
{
    /* For those other words */
    int i,j;
    cst_val *phones;

    phones = NULL;

    for (i=0; l->addenda[i]; i++)
    {
        if (((wp[0] == '0') ||
                (wp[0] == l->addenda[i][0][0]) ||
                (l->addenda[i][0][0] == '0')) &&
                (cst_streq(wp+1,l->addenda[i][0]+1)))
        {
            for (j=1; l->addenda[i][j]; j++)
                phones = cons_val(string_val(l->addenda[i][j]),phones);
            *found = TRUE;
            return val_reverse(phones);
        }
    }

    return NULL;
}
Exemple #13
0
STORAGE_HANDLE *storage_builtin(CTXTdeclc int builtin_number, Cell name, prolog_term trie_type)
{
    switch (builtin_number) {
    case GET_STORAGE_HANDLE:
        return get_storage_handle(CTXTc name, trie_type);
    case INCREMENT_STORAGE_SNAPSHOT:
        return increment_storage_snapshot(CTXTc name);
    case MARK_STORAGE_CHANGED:
        return mark_storage_changed(CTXTc name);
    case DESTROY_STORAGE_HANDLE: {
        xsb_dbgmsg((LOG_STORAGE,
                    "STORAGE_BUILTIN: Destroying storage handle for %s\n",
                    string_val(name)));
        destroy_storage_handle(name);
        return NULL;
    }
    case SHOW_TABLE_STATE: {
        show_table_state();
        return NULL;
    }
    default:
        xsb_abort("Unknown storage builtin");
        return NULL;
    }
}
Exemple #14
0
xsbBucket *search_bucket(Cell name,
			 xsbHashTable *table,
			 enum xsbHashSearchOp search_op)
{
  xsbBucket *bucket, *prev;

  if (! table->initted) init_hashtable(table);

  prev = NULL;
  bucket = get_top_bucket(table,table_hash(name,table->length));
  while (bucket && bucket->name) {
    if (bucket->name == name) {
      if (search_op == hashtable_delete) {
	if (!prev) {
	  /* if deleting a top bucket, copy the next bucket into the top one
	     and delete that next bucket. If no next, then just nullify name */
	  prev = bucket;
	  bucket=bucket->next;
	  if (bucket) {
	    /* use memcpy() because client bucket might have extra fields */
	    memcpy(prev, bucket, table->bucket_size);
	    free(bucket);
	  } else {
	    mark_bucket_free(prev,table->bucket_size);
	    xsb_dbgmsg((LOG_HASHTABLE,
		       "SEARCH_BUCKET: Destroying storage handle for %s\n",
		       string_val(name)));
	    xsb_dbgmsg((LOG_HASHTABLE, 
		       "SEARCH_BUCKET: Bucket nameptr is %p, next bucket %p\n",
		       prev->name, prev->next));
	  }
	} else {
	  /* Not top bucket: rearrange pointers & free space */
	  prev->next = bucket->next;
	  free(bucket);
	}
	return NULL;
      } else
	return bucket;
    }
    prev = bucket;
    bucket = bucket->next;
  }
  /* not found */
  if (search_op != hashtable_insert) return NULL;
  /* else create new bucket */
  /* calloc nullifies the allocated space; CLIENTS RELY ON THIS */
  if (!bucket) { /* i.e., it is not a top bucket */
    bucket = (xsbBucket *)calloc(1,table->bucket_size);
    if (!bucket)
      xsb_exit("Out of Memory: Can't allocate hash bucket");
    prev->next = bucket;
    /* NOTE: not necessary to nullify bucket->next because of calloc() */
  }
  bucket->name = name;
  return bucket;
}
Exemple #15
0
xsbBool str_cat(CTXTdecl)
{
    char *str1, *str2, *tmpstr;
    size_t tmpstr_len;

    term = ptoc_tag(CTXTc 1);
    term2 = ptoc_tag(CTXTc 2);
    if (isatom(term) && isatom(term2)) {
        str1 = string_val(term);
        str2 = string_val(term2);
        tmpstr_len = strlen(str1) + strlen(str2) + 1;

        tmpstr = (char *)mem_alloc(tmpstr_len,LEAK_SPACE);
        strcpy(tmpstr, str1);
        strcat(tmpstr, str2);
        str1 = string_find(tmpstr, 1);
        mem_dealloc(tmpstr,tmpstr_len,LEAK_SPACE);
        return atom_unify(CTXTc makestring(str1), ptoc_tag(CTXTc 3));
    } else return FALSE;
}
Exemple #16
0
cst_val *cst_args(char **argv, int argc,
		  const char *description,
		  cst_features *args)
{
    /* parses the given arguments wrt the description */
    cst_features *op_types = new_features();
    cst_val *files = NULL;
    int i;
    const char *type;
 
    parse_description(description,op_types);

    for (i=1; i<argc; i++)
    {
	if (argv[i][0] == '-')
	{
	    if ((!feat_present(op_types,argv[i])) ||
		(cst_streq("-h",argv[i])) ||
		(cst_streq("-?",argv[i])) ||
		(cst_streq("--help",argv[i])) ||
		(cst_streq("-help",argv[i])))
		parse_usage(argv[0],"","",description);
	    else
	    {
		type = feat_string(op_types,argv[i]);
		if (cst_streq("<binary>",type))
		    feat_set_string(args,argv[i],"true");
		else
		{
		    if (i+1 == argc)
			parse_usage(argv[0],
				    "missing argument for ",argv[i],
				    description);
		    if (cst_streq("<int>",type))
			feat_set_int(args,argv[i],atoi(argv[i+1]));
		    else if (cst_streq("<float>",type))
			feat_set_float(args,argv[i],atof(argv[i+1]));
		    else if (cst_streq("<string>",type))
			feat_set_string(args,argv[i],argv[i+1]);
		    else
			parse_usage(argv[0],
				    "unknown arg type ",type,
				    description);
		    i++;
		}
	    }
	}
	else
	    files = cons_val(string_val(argv[i]),files);
    }
    delete_features(op_types);

    return val_reverse(files);
}
Exemple #17
0
cst_val* ustring32_lts_apply(const ustring32_t u32,const cst_lts_rewrites *rule)
{
  size_t n=ustring32_length(u32);
  if(n==0) return NULL;
  cst_val *l=cons_val(string_val("#"),NULL);
  uint8_t b[8];
  size_t i=n;
  int k;
  do
    {
      i--;
      k=u8_uctomb(b,ustring32_at(u32,i),sizeof(b));
      b[k]='\0';
      l=cons_val(string_val((char*)b),l);
    }
  while(i);
  l=cons_val(string_val("#"),l);
  cst_val *output=lts_rewrites(l, rule);
  delete_val(l);
  return output;
}
cst_val *en_exp_id(const char *numstring)
{
    /* Expand numstring as pairs as in years or ids */
    char aaa[3];

    if ((strlen(numstring) == 4) && 
	(numstring[2] == '0') &&
	(numstring[3] == '0'))
    {
	if (numstring[1] == '0')
	    return en_exp_number(numstring); /* 2000, 3000 */
	else
	{
	    aaa[0] = numstring[0];
	    aaa[1] = numstring[1];
	    aaa[2] = '\0';
	    return val_append(en_exp_number(aaa),
			      cons_val(string_val("hundred"),0));
	}
    }
    else if ((strlen(numstring) == 2) && (numstring[0] == '0'))
	return cons_val(string_val("oh"),
			en_exp_digits(&numstring[1]));
    else if (((strlen(numstring) == 4) && 
	 ((numstring[1] == '0'))) ||
	(strlen(numstring) < 3))
	return en_exp_number(numstring);
    else if (strlen(numstring)%2 == 1)
    {
	return cons_val(string_val(digit2num[numstring[0]-'0']),
			en_exp_id(&numstring[1]));
    }
    else 
    {
	aaa[0] = numstring[0];
	aaa[1] = numstring[1];
	aaa[2] = '\0';
	return val_append(en_exp_number(aaa),en_exp_id(&numstring[2]));
    }
}
cst_val *lts_rewrites_word(const char *word, const cst_lts_rewrites *r)
{
    cst_val *w, *p;
    char x[2];
    int i;

    x[1] = '\0';
    w = cons_val(string_val("#"),NULL);
    for (i=0; word[i]; i++)
    {
	x[0] = word[i];
	w = cons_val(string_val(x),w);
    }
    w = cons_val(string_val("#"),w);

    w = val_reverse(w);

    p = lts_rewrites(w,r);

    delete_val(w);

    return p;
}
Exemple #20
0
Pair link_sym(Psc psc, Psc mod_psc)
{
    Pair *search_ptr, found_pair;
    char *name;
    byte arity, global_flag, type;

    SYS_MUTEX_LOCK_NOERROR( MUTEX_SYMBOL ) ;
    name = get_name(psc);
    arity = get_arity(psc);
    if ( (global_flag = is_globalmod(mod_psc)) ) {
      search_ptr = (Pair *)symbol_table.table +
	           hash(name, arity, symbol_table.size);
    } else
      search_ptr = (Pair *)&get_data(mod_psc);
    if ((found_pair = search(arity, name, search_ptr))) {
      if (pair_psc(found_pair) != psc) {
	/*
	 *  Invalidate the old name!! It is no longer accessible
	 *  through the global chain.
	 */
	type = get_type(pair_psc(found_pair));
	if ( type != T_ORDI ) {
	  char message[220], modmsg[200];
	  if (type == T_DYNA || type == T_PRED) {
	    Psc mod_psc;
	    mod_psc = (Psc) get_data(pair_psc(found_pair));
	    if (mod_psc == 0) snprintf(modmsg,200,"%s","usermod");
	    else if (isstring(mod_psc)) snprintf(modmsg,200,"usermod from file: %s",string_val(mod_psc));
	    else snprintf(modmsg,200,"module: %s",get_name(mod_psc));
	    snprintf(message,220,
		    "%s/%d (type %d) had been defined in %s",
		     name, arity, type, 
		     modmsg);
	  } else 
	    snprintf(message,220,
		    "%s/%d (type %d) had been defined in another module!",
		    name, arity, type);
	  xsb_warn(message);
	}
	pair_psc(found_pair) = psc;
      }
    }
    else {
      found_pair = make_psc_pair(psc, search_ptr);
      if (global_flag)
	symbol_table_increment_and_check_for_overflow;
    }
    SYS_MUTEX_UNLOCK_NOERROR( MUTEX_SYMBOL ) ;
    return found_pair;
} /* link_sym */
Exemple #21
0
cst_val *en_exp_ordinal(const char *rawnumstring)
{
    /* return ordinal for digit string */
    cst_val *card, *o;
    const cst_val *t;
    const char *l;
    const char *ord;
    char *numstring;
    int i,j;

    numstring = cst_strdup(rawnumstring);
    for (j=i=0; i < cst_strlen(rawnumstring); i++)
	if (rawnumstring[i] != ',')
	{
	    numstring[j] = rawnumstring[i];
	    j++;
	}
    numstring[j] = '\0';
    card = val_reverse(en_exp_number(numstring));
    cst_free(numstring);

    l = val_string(val_car(card));
    ord = 0;
    for (i=0; i<10; i++)
	if (cst_streq(l,digit2num[i]))
	    ord = ord2num[i];
    if (!ord)
	for (i=0; i<10; i++)
	    if (cst_streq(l,digit2teen[i]))
		ord = ord2teen[i];
    if (!ord)
	for (i=0; i<10; i++)
	    if (cst_streq(l,digit2enty[i]))
		ord = ord2enty[i];
    if (cst_streq(l,"hundred"))
	ord = "hundredth";
    if (cst_streq(l,"thousand"))
	ord = "thousandth";
    if (cst_streq(l,"billion"))
	ord = "billtionth";
    if (!ord)  /* dunno, so don't convert anything */
	return card;
    o = cons_val(string_val(ord),0);
    for (t=val_cdr(card); t; t=val_cdr(t))
	o = cons_val(val_car(t),o);
    delete_val(card);
    return o;
}
Exemple #22
0
void ItemLocation::ToItemJson(rapidjson::Value *root_ptr, rapidjson_allocator &alloc) {
    auto &root = *root_ptr;
    rapidjson::Value string_val(rapidjson::kStringType);
    root.AddMember("_type", static_cast<int>(type_), alloc);
    if (type_ == ItemLocationType::STASH) {
        root.AddMember("_tab", tab_id_, alloc);
        string_val.SetString(tab_label_.c_str(), alloc);
        root.AddMember("_tab_label", string_val, alloc);
    } else {
        string_val.SetString(character_.c_str(), alloc);
        root.AddMember("_character", string_val, alloc);
    }
    if (socketed_) {
        root.AddMember("_x", x_, alloc);
        root.AddMember("_y", y_, alloc);
    }
    root.AddMember("_socketed", socketed_, alloc);
}
int in_reg2_list(CTXTdeclc Psc psc) {
  Cell list,term;

  list = reg[2];
  XSB_Deref(list);
  if (isnil(list)) return TRUE; /* if filter is empty, return all */
  while (!isnil(list)) {
    term = get_list_head(list);
    XSB_Deref(term);
    if (isconstr(term)) {
      if (psc == get_str_psc(term)) return TRUE;
    } else if (isstring(term)) {
      if (get_name(psc) == string_val(term)) return TRUE;
    }
    list = get_list_tail(list);
  }
  return FALSE;
}
Exemple #24
0
cst_val *cst_utf8_explode(const cst_string *utf8string)
{
    /* return a list of utf-8 characters as strings */
    const unsigned char *xxx = (const unsigned char *)utf8string;
    cst_val *chars=NULL;
    int i, l=0;
    char utf8char[5];
#import "OpenEarsStaticAnalysisToggle.h"
#ifdef STATICANALYZEDEPENDENCIES
#define __clang_analyzer__ 1
#endif
#if !defined(__clang_analyzer__) || defined(STATICANALYZEDEPENDENCIES)
#undef __clang_analyzer__
    for (i=0; xxx[i]; i++)
    {
        if (xxx[i] < 0x80)  /* one byte */
        {
            sprintf(utf8char,"%c",xxx[i]);
            l = 1;
        }
        else if (xxx[i] < 0xe0) /* two bytes */
        {
            sprintf(utf8char,"%c%c",xxx[i],xxx[i+1]);
            i++;
            l = 2;
        }
        else if (xxx[i] < 0xff) /* three bytes */
        {
            sprintf(utf8char,"%c%c%c",xxx[i],xxx[i+1],xxx[i+2]);
            i++; i++;
            l = 3;
        }
        else
        {
            sprintf(utf8char,"%c%c%c%c",xxx[i],xxx[i+1],xxx[i+2],xxx[i+3]);
            i++; i++; i++;
            l = 4;
        }
        chars = cons_val(string_val(utf8char),chars);
    }
    return val_reverse(chars);
#endif
}
int sprintTrieSymbol(char * buffer, Cell symbol) {
  int ctr;

  if ( symbol == ESCAPE_NODE_SYMBOL )
    return sprintf(buffer, "%lu [ESCAPE_NODE_SYMBOL]", ESCAPE_NODE_SYMBOL);
  else {
    switch(TrieSymbolType(symbol)) {
    case XSB_INT:
      return sprintf(buffer, IntegerFormatString, int_val(symbol));
      break;
    case XSB_FLOAT:
      return sprintf(buffer, "%f", float_val(symbol));
      break;
    case XSB_STRING:
      return sprintf(buffer, "%s", string_val(symbol));
      break;
    case XSB_TrieVar:
      return sprintf(buffer, "_V" IntegerFormatString, DecodeTrieVar(symbol));
      break;
    case XSB_STRUCT:
      {
	Psc psc;
	if (isboxedfloat(symbol))
          {
              return sprintf(buffer, "%lf", boxedfloat_val(symbol));
              break;              
          }
	psc = DecodeTrieFunctor(symbol);
	ctr = sprint_quotedname(buffer, 0, get_name(psc));
	return sprintf(buffer+ctr, "/%d", get_arity(psc));
      }
      break;
    case XSB_LIST:
      return sprintf(buffer, "LIST");
      break;
    default:
      return sprintf(buffer, "Unknown symbol (tag = %" Intfmt")", cell_tag(symbol));
      break;
    }
  }
}
cst_val *cmu_grapheme_lex_lts_function(const struct lexicon_struct *l, 
                                       const char *word, const char *pos,
                                       const cst_features *feats)
{
    cst_val *phones = 0;
    cst_val *utflets = 0;
    const cst_val *v;
    char ord[10];
    int i,phindex;

    /* string to utf8 chars */
    utflets = cst_utf8_explode(word);

    for (v=utflets; v; v=val_cdr(v))
    {
        /* We will add the found phones in reverse order and reverse then */
        /* afterwards */
        cst_utf8_as_hex(val_string(val_car(v)),ord);
        phindex = cst_find_u2sampa(ord);
        if (phindex < 0)
            printf("awb_debug no sampa %s %s\n",val_string(val_car(v)),ord);
        for (i=4; (phindex>=0) && (i>0); i--)
        {
            if (unicode_sampa_mapping[phindex][i])
                phones = cons_val(string_val(unicode_sampa_mapping[phindex][i]),
                                  phones);
        }
    }

    phones = val_reverse(phones);
#if 1
    printf("cmu_grapheme_lex.c: word \"%s\" ",word);
    val_print(stdout,phones);
    printf("\n");
#endif

    delete_val(utflets);

    return phones;
}
static int make_flags(prolog_term flag_term, char *context)
{
  int flags = 0;
  prolog_term aux_list=flag_term, head_trm;
  char *head;

#ifdef MULTI_THREAD
  if( NULL == th)
	th = xsb_get_main_thread();
#endif

  if (is_var(flag_term))
    return REG_EXTENDED;
  else if (is_int(flag_term))
    return (REG_EXTENDED | REG_ICASE);

  if (is_nil(flag_term))
    return 0; /* basic, case-sensitive */

  if (! is_list(flag_term))
    xsb_abort("[%s] Arg 4 (flags) must be a variable, an integer, or a list",
	      context);

  do {
    head_trm = p2p_car(aux_list);
    aux_list = p2p_cdr(aux_list);
    if (!is_string(head_trm))
      xsb_abort("[%s] Arg 4: allowed flags are `extended' and `ignorecase'",
		context);
    head = string_val(head_trm);
    if (strcmp(head,"extended")==0)
      flags = flags | REG_EXTENDED;
    else if (strcmp(head,"ignorecase")==0)
      flags = flags | REG_ICASE;
  } while (!is_nil(aux_list));
  
  return flags;
}
Exemple #28
0
cst_val *cst_utf8_explode(const cst_string *utf8string)
{
    /* return a list of utf-8 characters as strings */
    const unsigned char *xxx = (const unsigned char *)utf8string;
    cst_val *chars=NULL;
    int i, l=0;
    char utf8char[5];

    for (i=0; xxx[i]; i++)
    {
        if (xxx[i] < 0x80)  /* one byte */
        {
            sprintf(utf8char,"%c",xxx[i]);
            l = 1;
        }
        else if (xxx[i] < 0xe0) /* two bytes */
        {
            sprintf(utf8char,"%c%c",xxx[i],xxx[i+1]);
            i++;
            l = 2;
        }
        else if (xxx[i] < 0xff) /* three bytes */
        {
            sprintf(utf8char,"%c%c%c",xxx[i],xxx[i+1],xxx[i+2]);
            i++; i++;
            l = 3;
        }
        else
        {
            sprintf(utf8char,"%c%c%c%c",xxx[i],xxx[i+1],xxx[i+2],xxx[i+3]);
            i++; i++; i++;
            l = 4;
        }
        chars = cons_val(string_val(utf8char),chars);
    }
    return val_reverse(chars);

}
/* XSB string substitution entry point
   In: 
       Arg1: string
       Arg2: beginning offset
       Arg3: ending offset. < 0 means end of string
   Out:
       Arg4: new (output) string
   Always succeeds, unless error.
*/
int do_regsubstring__(void)
{
#ifdef MULTI_THREAD
  if( NULL == th)
	th = xsb_get_main_thread();
#endif

  /* Prolog args are first assigned to these, so we could examine the types
     of these objects to determine if we got strings or atoms. */
  prolog_term input_term, output_term;
  prolog_term beg_offset_term, end_offset_term;
  char *input_string=NULL;    /* string where matches are to be found */
  int beg_offset, end_offset, input_len, substring_len;
  int conversion_required=FALSE;
  
  XSB_StrSet(&output_buffer,"");

  input_term = reg_term(CTXTc 1);  /* Arg1: string to find matches in */
  if (is_string(input_term)) /* check it */
    input_string = string_val(input_term);
  else if (is_list(input_term)) {
    input_string = p_charlist_to_c_string(CTXTc input_term, &input_buffer,
					  "RE_SUBSTRING", "input string");
    conversion_required = TRUE;
  } else
    xsb_abort("[RE_SUBSTRING] Arg 1 (the input string) must be an atom or a character list");

  input_len = strlen(input_string);

  /* arg 2: beginning offset */
  beg_offset_term = reg_term(CTXTc 2);
  if (! is_int(beg_offset_term))
    xsb_abort("[RE_SUBSTRING] Arg 2 (the beginning offset) must be an integer");
  beg_offset = int_val(beg_offset_term);
  if (beg_offset < 0 || beg_offset > input_len)
    xsb_abort("[RE_SUBSTRING] Arg 2 (=%d) must be between 0 and %d",  
	      beg_offset, input_len);

  /* arg 3: ending offset */
  end_offset_term = reg_term(CTXTc 3);
  if (! is_int(end_offset_term))
    xsb_abort("[RE_SUBSTRING] Arg 3 (the ending offset) must be an integer");
  end_offset = int_val(end_offset_term);
  if (end_offset < 0)
    end_offset = input_len;
  else if (end_offset > input_len || end_offset < beg_offset)
    xsb_abort("[RE_SUBSTRING] Arg 3 (=%d) must be < 0 or between %d and %d",
	      end_offset, beg_offset, input_len);

  output_term = reg_term(CTXTc 4);
  if (! is_var(output_term))
    xsb_abort("[RE_SUBSTRING] Arg 4 (the output string) must be an unbound variable");

  /* do the actual replacement */
  substring_len = end_offset-beg_offset;
  XSB_StrAppendBlk(&output_buffer, input_string+beg_offset, substring_len);
  XSB_StrNullTerminate(&output_buffer);
  
  /* get result out */
  if (conversion_required)
    c_string_to_p_charlist(CTXTc output_buffer.string, output_term, 4,
			   "RE_SUBSTITUTE", "Arg 4");
  else
    /* DO NOT intern. When atom table garbage collection is in place, then
       replace the instruction with this:
       	   c2p_string(output_buffer, output_term);
       The reason for not interning is that in Web page
       manipulation it is often necessary to process the same string many
       times. This can cause atom table overflow. Not interning allws us to
       circumvent the problem.  */
    ctop_string(CTXTc 4, output_buffer.string);
  
  return(TRUE);
}
/* XSB string substitution entry point: replace substrings specified in Arg2
   with strings in Arg3.
   In: 
       Arg1: string
       Arg2: substring specification, a list [s(B1,E1),s(B2,E2),...]
       Arg3: list of replacement string
   Out:
       Arg4: new (output) string
   Always succeeds, unless error.
*/
int do_regsubstitute__(void)
{
#ifdef MULTI_THREAD
  if( NULL == th)
	th = xsb_get_main_thread();
#endif

  /* Prolog args are first assigned to these, so we could examine the types
     of these objects to determine if we got strings or atoms. */
  prolog_term input_term, output_term;
  prolog_term subst_reg_term, subst_spec_list_term, subst_spec_list_term1;
  prolog_term subst_str_term=(prolog_term)0,
    subst_str_list_term, subst_str_list_term1;
  char *input_string=NULL;    /* string where matches are to be found */
  char *subst_string=NULL;
  prolog_term beg_term, end_term;
  int beg_offset=0, end_offset=0, input_len;
  int last_pos = 0; /* last scanned pos in input string */
  /* the output buffer is made large enough to include the input string and the
     substitution string. */
  int conversion_required=FALSE; /* from C string to Prolog char list */
  
  XSB_StrSet(&output_buffer,"");

  input_term = reg_term(CTXTc 1);  /* Arg1: string to find matches in */
  if (is_string(input_term)) /* check it */
    input_string = string_val(input_term);
  else if (is_list(input_term)) {
    input_string = p_charlist_to_c_string(CTXTc input_term, &input_buffer,
					  "RE_SUBSTITUTE", "input string");
    conversion_required = TRUE;
  } else
    xsb_abort("[RE_SUBSTITUTE] Arg 1 (the input string) must be an atom or a character list");

  input_len = strlen(input_string);

  /* arg 2: substring specification */
  subst_spec_list_term = reg_term(CTXTc 2);
  if (!is_list(subst_spec_list_term) && !is_nil(subst_spec_list_term))
    xsb_abort("[RE_SUBSTITUTE] Arg 2 must be a list [s(B1,E1),s(B2,E2),...]");

  /* handle substitution string */
  subst_str_list_term = reg_term(CTXTc 3);
  if (! is_list(subst_str_list_term))
    xsb_abort("[RE_SUBSTITUTE] Arg 3 must be a list of strings");

  output_term = reg_term(CTXTc 4);
  if (! is_var(output_term))
    xsb_abort("[RE_SUBSTITUTE] Arg 4 (the output) must be an unbound variable");

  subst_spec_list_term1 = subst_spec_list_term;
  subst_str_list_term1 = subst_str_list_term;

  if (is_nil(subst_spec_list_term1)) {
    XSB_StrSet(&output_buffer, input_string);
    goto EXIT;
  }
  if (is_nil(subst_str_list_term1))
    xsb_abort("[RE_SUBSTITUTE] Arg 3 must not be an empty list");

  do {
    subst_reg_term = p2p_car(subst_spec_list_term1);
    subst_spec_list_term1 = p2p_cdr(subst_spec_list_term1);

    if (!is_nil(subst_str_list_term1)) {
      subst_str_term = p2p_car(subst_str_list_term1);
      subst_str_list_term1 = p2p_cdr(subst_str_list_term1);

      if (is_string(subst_str_term)) {
	subst_string = string_val(subst_str_term);
      } else if (is_list(subst_str_term)) {
	subst_string = p_charlist_to_c_string(CTXTc subst_str_term, &subst_buf,
					      "RE_SUBSTITUTE",
					      "substitution string");
      } else 
	xsb_abort("[RE_SUBSTITUTE] Arg 3 must be a list of strings");
    }

    beg_term = p2p_arg(subst_reg_term,1);
    end_term = p2p_arg(subst_reg_term,2);

    if (!is_int(beg_term) || !is_int(end_term))
      xsb_abort("[RE_SUBSTITUTE] Non-integer in Arg 2");
    else{
      beg_offset = int_val(beg_term);
      end_offset = int_val(end_term);
    }
    /* -1 means end of string */
    if (end_offset < 0)
      end_offset = input_len;
    if ((end_offset < beg_offset) || (beg_offset < last_pos))
      xsb_abort("[RE_SUBSTITUTE] Substitution regions in Arg 2 not sorted");

    /* do the actual replacement */
    XSB_StrAppendBlk(&output_buffer,input_string+last_pos,beg_offset-last_pos);
    XSB_StrAppend(&output_buffer, subst_string);
    
    last_pos = end_offset;

  } while (!is_nil(subst_spec_list_term1));

  XSB_StrAppend(&output_buffer, input_string+end_offset);

 EXIT:
  /* get result out */
  if (conversion_required)
    c_string_to_p_charlist(CTXTc output_buffer.string, output_term, 4,
			   "RE_SUBSTITUTE", "Arg 4");
  else
    /* DO NOT intern. When atom table garbage collection is in place, then
       replace the instruction with this:
       	   c2p_string(output_buffer, output_term);
       The reason for not interning is that in Web page
       manipulation it is often necessary to process the same string many
       times. This can cause atom table overflow. Not interning allws us to
       circumvent the problem.  */
    ctop_string(CTXTc 4, output_buffer.string);
  
  return(TRUE);
}