*/ RL_API REBYTE *RL_Word_String(u32 word) /* ** Return a string related to a given global word identifier. ** ** Returns: ** A copy of the word string, null terminated. ** Arguments: ** word - a global word identifier ** Notes: ** The result is a null terminated copy of the name for your own use. ** The string is always UTF-8 encoded (chars > 127 are encoded.) ** In this API, word identifiers are always canonical. Therefore, ** the returned string may have different spelling/casing than expected. ** The string is allocated with OS_ALLOC and you can OS_FREE it any time. ** ***********************************************************************/ { REBYTE *s1, *s2; // !!This code should use a function from c-words.c (but nothing perfect yet.) if (word == 0 || word >= PG_Word_Table.series->tail) return 0; s1 = VAL_SYM_NAME(BLK_SKIP(PG_Word_Table.series, word)); s2 = OS_ALLOC_ARRAY(REBYTE, LEN_BYTES(s1) + 1); COPY_BYTES(s2, s1, LEN_BYTES(s1) + 1); return s2; }
*/ REBYTE *Get_Sym_Name(REBCNT num) /* ***********************************************************************/ { if (num == 0 || num >= PG_Word_Table.series->tail) return (REBYTE*)"???"; return VAL_SYM_NAME(BLK_SKIP(PG_Word_Table.series, num)); }
*/ static void Expand_Word_Table(void) /* ** Expand the hash table part of the word_table by allocating ** the next larger table size and rehashing all the words of ** the current table. Free the old hash array. ** ***********************************************************************/ { REBCNT *hashes; REBVAL *word; REBINT hash; REBCNT size; REBINT skip; REBCNT n; // Allocate a new hash table: Expand_Hash(PG_Word_Table.hashes); // Debug_Fmt("WORD-TABLE: expanded (%d symbols, %d slots)", PG_Word_Table.series->tail, PG_Word_Table.hashes->tail); // Rehash all the symbols: word = BLK_SKIP(PG_Word_Table.series, 1); hashes = (REBCNT *)PG_Word_Table.hashes->data; size = PG_Word_Table.hashes->tail; for (n = 1; n < PG_Word_Table.series->tail; n++, word++) { hash = Hash_Word(VAL_SYM_NAME(word), -1); skip = (hash & 0x0000FFFF) % size; if (skip == 0) skip = 1; hash = (hash & 0x00FFFF00) % size; while (hashes[hash]) { hash += skip; if (hash >= (REBINT)size) hash -= size; } hashes[hash] = n; } }
// // RL_Word_String: C // // Return a string related to a given global word identifier. // // Returns: // A copy of the word string, null terminated. // Arguments: // word - a global word identifier // Notes: // The result is a null terminated copy of the name for your own use. // The string is always UTF-8 encoded (chars > 127 are encoded.) // In this API, word identifiers are always canonical. Therefore, // the returned string may have different spelling/casing than expected. // The string is allocated with OS_ALLOC and you can OS_FREE it any time. // RL_API REBYTE *RL_Word_String(u32 word) { REBYTE *s1, *s2; // !!This code should use a function from c-words.c (but nothing perfect yet.) if (word == 0 || word >= ARR_LEN(PG_Word_Table.array)) return 0; s1 = VAL_SYM_NAME(ARR_AT(PG_Word_Table.array, word)); s2 = OS_ALLOC_N(REBYTE, LEN_BYTES(s1) + 1); COPY_BYTES(s2, s1, LEN_BYTES(s1) + 1); return s2; }
xx*/ void Dump_Word_Table() /* ***********************************************************************/ { REBCNT n; REBVAL *words = BLK_HEAD(PG_Word_Table.series); for (n = 1; n < PG_Word_Table.series->tail; n++) { Debug_Fmt("%03d: %s = %d (%d)", n, VAL_SYM_NAME(words+n), VAL_SYM_CANON(words+n), VAL_SYM_ALIAS(words+n)); // if ((n % 40) == 0) getchar(); } }
*/ REBCNT Make_Word(REBYTE *str, REBCNT len) /* ** Given a string and its length, compute its hash value, ** search for a match, and if not found, add it to the table. ** Length of zero indicates you provided a zero terminated string. ** Return the table index for the word (whether found or new). ** ***********************************************************************/ { REBINT hash; REBINT size; REBINT skip; REBINT n; REBCNT h; REBCNT *hashes; REBVAL *words; REBVAL *w; //REBYTE *sss = Get_Sym_Name(1); // (Debugging method) if (len == 0) len = LEN_BYTES(str); // If hash part of word table is too dense, expand it: if (PG_Word_Table.series->tail > PG_Word_Table.hashes->tail/2) Expand_Word_Table(); ASSERT((SERIES_TAIL(PG_Word_Table.series) == SERIES_TAIL(Bind_Table)), RP_BIND_TABLE_SIZE); // If word symbol part of word table is full, expand it: if (SERIES_FULL(PG_Word_Table.series)) { Extend_Series(PG_Word_Table.series, 256); } if (SERIES_FULL(Bind_Table)) { Extend_Series(Bind_Table, 256); CLEAR_SERIES(Bind_Table); } size = (REBINT)PG_Word_Table.hashes->tail; words = BLK_HEAD(PG_Word_Table.series); hashes = (REBCNT *)PG_Word_Table.hashes->data; // Hash the word, including a skip factor for lookup: hash = Hash_Word(str, len); skip = (hash & 0x0000FFFF) % size; if (skip == 0) skip = 1; hash = (hash & 0x00FFFF00) % size; //Debug_Fmt("%s hash %d skip %d", str, hash, skip); // Search hash table for word match: while (NZ(h = hashes[hash])) { while ((n = Compare_UTF8(VAL_SYM_NAME(words+h), str, len)) >= 0) { //if (Match_String("script", str, len)) // Debug_Fmt("---- %s %d %d\n", VAL_SYM_NAME(&words[h]), n, h); if (n == 0) return h; // direct hit if (VAL_SYM_ALIAS(words+h)) h = VAL_SYM_ALIAS(words+h); else goto make_sym; // Create new alias for word } hash += skip; if (hash >= size) hash -= size; } make_sym: n = PG_Word_Table.series->tail; w = words + n; if (h) { // Alias word (h = canon word) VAL_SYM_ALIAS(words+h) = n; VAL_SYM_CANON(w) = VAL_SYM_CANON(words+h); } else { // Canon (base version of) word (h == 0) hashes[hash] = n; VAL_SYM_CANON(w) = n; } VAL_SYM_ALIAS(w) = 0; VAL_SYM_NINDEX(w) = Make_Word_Name(str, len); VAL_SET(w, REB_HANDLE); // These are allowed because of the SERIES_FULL checks above which // add one extra to the TAIL check comparision. However, their // termination values (nulls) will be missing. PG_Word_Table.series->tail++; Bind_Table->tail++; return n; }