Beispiel #1
0
*/ RL_API REBYTE *RL_Word_String(u32 word)
/*
**	Return a string related to a given global word identifier.
**
**	Returns:
**		A copy of the word string, null terminated.
**	Arguments:
**		word - a global word identifier
**	Notes:
**		The result is a null terminated copy of the name for your own use.
**		The string is always UTF-8 encoded (chars > 127 are encoded.)
**		In this API, word identifiers are always canonical. Therefore,
**		the returned string may have different spelling/casing than expected.
**		The string is allocated with OS_ALLOC and you can OS_FREE it any time.
**
***********************************************************************/
{
	REBYTE *s1, *s2;
	// !!This code should use a function from c-words.c (but nothing perfect yet.)
	if (word == 0 || word >= PG_Word_Table.series->tail) return 0;
	s1 = VAL_SYM_NAME(BLK_SKIP(PG_Word_Table.series, word));
	s2 = OS_ALLOC_ARRAY(REBYTE, LEN_BYTES(s1) + 1);
	COPY_BYTES(s2, s1, LEN_BYTES(s1) + 1);
	return s2;
}
Beispiel #2
0
*/	REBYTE *Get_Sym_Name(REBCNT num)
/*
***********************************************************************/
{
	if (num == 0 || num >= PG_Word_Table.series->tail) return (REBYTE*)"???";
	return VAL_SYM_NAME(BLK_SKIP(PG_Word_Table.series, num));
}
Beispiel #3
0
*/	static void Expand_Word_Table(void)
/*
**		Expand the hash table part of the word_table by allocating
**		the next larger table size and rehashing all the words of
**		the current table.  Free the old hash array.
**
***********************************************************************/
{
	REBCNT *hashes;
	REBVAL *word;
	REBINT hash;
	REBCNT size;
	REBINT skip;
	REBCNT n;

	// Allocate a new hash table:
	Expand_Hash(PG_Word_Table.hashes);
	// Debug_Fmt("WORD-TABLE: expanded (%d symbols, %d slots)", PG_Word_Table.series->tail, PG_Word_Table.hashes->tail);

	// Rehash all the symbols:
	word = BLK_SKIP(PG_Word_Table.series, 1);
	hashes = (REBCNT *)PG_Word_Table.hashes->data;
	size = PG_Word_Table.hashes->tail;
	for (n = 1; n < PG_Word_Table.series->tail; n++, word++) {
		hash = Hash_Word(VAL_SYM_NAME(word), -1);
		skip  = (hash & 0x0000FFFF) % size;
		if (skip == 0) skip = 1;
		hash = (hash & 0x00FFFF00) % size;
		while (hashes[hash]) {
			hash += skip;
			if (hash >= (REBINT)size) hash -= size;
		}
		hashes[hash] = n;
	}
}
Beispiel #4
0
//
//  RL_Word_String: C
// 
// Return a string related to a given global word identifier.
// 
// Returns:
//     A copy of the word string, null terminated.
// Arguments:
//     word - a global word identifier
// Notes:
//     The result is a null terminated copy of the name for your own use.
//     The string is always UTF-8 encoded (chars > 127 are encoded.)
//     In this API, word identifiers are always canonical. Therefore,
//     the returned string may have different spelling/casing than expected.
//     The string is allocated with OS_ALLOC and you can OS_FREE it any time.
//
RL_API REBYTE *RL_Word_String(u32 word)
{
    REBYTE *s1, *s2;
    // !!This code should use a function from c-words.c (but nothing perfect yet.)
    if (word == 0 || word >= ARR_LEN(PG_Word_Table.array)) return 0;
    s1 = VAL_SYM_NAME(ARR_AT(PG_Word_Table.array, word));
    s2 = OS_ALLOC_N(REBYTE, LEN_BYTES(s1) + 1);
    COPY_BYTES(s2, s1, LEN_BYTES(s1) + 1);
    return s2;
}
Beispiel #5
0
xx*/	void Dump_Word_Table()
/*
***********************************************************************/
{
	REBCNT	n;
	REBVAL *words = BLK_HEAD(PG_Word_Table.series);

	for (n = 1; n < PG_Word_Table.series->tail; n++) {
		Debug_Fmt("%03d: %s = %d (%d)", n, VAL_SYM_NAME(words+n),
			VAL_SYM_CANON(words+n), VAL_SYM_ALIAS(words+n));
//		if ((n % 40) == 0) getchar();
	}
}
Beispiel #6
0
*/	REBCNT Make_Word(REBYTE *str, REBCNT len)
/*
**		Given a string and its length, compute its hash value,
**		search for a match, and if not found, add it to the table.
**		Length of zero indicates you provided a zero terminated string.
**		Return the table index for the word (whether found or new).
**
***********************************************************************/
{
	REBINT	hash;
	REBINT	size;
	REBINT	skip;
	REBINT	n;
	REBCNT	h;
	REBCNT	*hashes;
	REBVAL  *words;
	REBVAL  *w;

	//REBYTE *sss = Get_Sym_Name(1);	// (Debugging method)

	if (len == 0) len = LEN_BYTES(str);

	// If hash part of word table is too dense, expand it:
	if (PG_Word_Table.series->tail > PG_Word_Table.hashes->tail/2)
		Expand_Word_Table();

	ASSERT((SERIES_TAIL(PG_Word_Table.series) == SERIES_TAIL(Bind_Table)), RP_BIND_TABLE_SIZE);

	// If word symbol part of word table is full, expand it:
	if (SERIES_FULL(PG_Word_Table.series)) {
		Extend_Series(PG_Word_Table.series, 256);
	}
	if (SERIES_FULL(Bind_Table)) {
		Extend_Series(Bind_Table, 256);
		CLEAR_SERIES(Bind_Table);
	}

	size   = (REBINT)PG_Word_Table.hashes->tail;
	words  = BLK_HEAD(PG_Word_Table.series);
	hashes = (REBCNT *)PG_Word_Table.hashes->data;

	// Hash the word, including a skip factor for lookup:
	hash  = Hash_Word(str, len);
	skip  = (hash & 0x0000FFFF) % size;
	if (skip == 0) skip = 1;
	hash = (hash & 0x00FFFF00) % size;
	//Debug_Fmt("%s hash %d skip %d", str, hash, skip);

	// Search hash table for word match:
	while (NZ(h = hashes[hash])) {
		while ((n = Compare_UTF8(VAL_SYM_NAME(words+h), str, len)) >= 0) {
			//if (Match_String("script", str, len))
			//	Debug_Fmt("---- %s %d %d\n", VAL_SYM_NAME(&words[h]), n, h);
			if (n == 0) return h; // direct hit
			if (VAL_SYM_ALIAS(words+h)) h = VAL_SYM_ALIAS(words+h);
			else goto make_sym; // Create new alias for word
		}
		hash += skip;
		if (hash >= size) hash -= size;
	}

make_sym:
	n = PG_Word_Table.series->tail;
	w = words + n;
	if (h) {
		// Alias word (h = canon word)
		VAL_SYM_ALIAS(words+h) = n;
		VAL_SYM_CANON(w) = VAL_SYM_CANON(words+h);
	} else {
		// Canon (base version of) word (h == 0)
		hashes[hash] = n;
		VAL_SYM_CANON(w) = n;
	}
	VAL_SYM_ALIAS(w) = 0;
	VAL_SYM_NINDEX(w) = Make_Word_Name(str, len);
	VAL_SET(w, REB_HANDLE);

	// These are allowed because of the SERIES_FULL checks above which
	// add one extra to the TAIL check comparision. However, their
	// termination values (nulls) will be missing.
	PG_Word_Table.series->tail++;
	Bind_Table->tail++;

	return n;
}