// // Rehash_Hash: C // // Recompute the entire hash table. Table must be large enough. // static void Rehash_Hash(REBSER *series) { REBVAL *val; REBCNT n; REBCNT key; REBCNT *hashes; if (!series->extra.series) return; hashes = cast(REBCNT*, series->extra.series->data); val = BLK_HEAD(series); for (n = 0; n < series->tail; n += 2, val += 2) { key = Find_Key(series, series->extra.series, val, 2, 0, 0); hashes[key] = n/2+1; } }
*/ static void Rehash_Hash(REBSER *series) /* ** Recompute the entire hash table. Table must be large enough. ** ***********************************************************************/ { REBVAL *val; REBCNT n; REBCNT key; REBCNT *hashes; if (!series->extra.series) return; hashes = cast(REBCNT*, series->extra.series->data); val = BLK_HEAD(series); for (n = 0; n < series->tail; n += 2, val += 2) { key = Find_Key(series, series->extra.series, val, 2, 0, 0); hashes[key] = n/2+1; } }
// // Make_Set_Operation_Series: C // // Do set operations on a series. Case-sensitive if `cased` is TRUE. // `skip` is the record size. // static REBSER *Make_Set_Operation_Series(const REBVAL *val1, const REBVAL *val2, REBCNT flags, REBCNT cased, REBCNT skip) { REBSER *buffer; // buffer for building the return series REBCNT i; REBINT h = TRUE; REBFLG first_pass = TRUE; // are we in the first pass over the series? REBSER *out_ser; // This routine should only be called with SERIES! values assert(ANY_SERIES(val1)); if (val2) { assert(ANY_SERIES(val2)); if (ANY_ARRAY(val1)) { if (!ANY_ARRAY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); // As long as they're both arrays, we're willing to do: // // >> union quote (a b c) 'b/d/e // (a b c d e) // // The type of the result will match the first value. } else if (!IS_BINARY(val1)) { // We will similarly do any two ANY-STRING! types: // // >> union <abc> "bde" // <abcde> if (IS_BINARY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); } else { // Binaries only operate with other binaries if (!IS_BINARY(val2)) fail (Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2))); } } // Calculate i as length of result block. i = VAL_LEN(val1); if (flags & SOP_FLAG_BOTH) i += VAL_LEN(val2); if (ANY_ARRAY(val1)) { REBSER *hser = 0; // hash table for series REBSER *hret; // hash table for return series buffer = BUF_EMIT; // use preallocated shared block Resize_Series(buffer, i); hret = Make_Hash_Sequence(i); // allocated // Optimization note: !! // This code could be optimized for small blocks by not hashing them // and extending Find_Key to do a FIND on the value itself w/o the hash. do { REBSER *ser = VAL_SERIES(val1); // val1 and val2 swapped 2nd pass! // Check what is in series1 but not in series2: if (flags & SOP_FLAG_CHECK) hser = Hash_Block(val2, cased); // Iterate over first series: i = VAL_INDEX(val1); for (; i < SERIES_TAIL(ser); i += skip) { REBVAL *item = BLK_SKIP(ser, i); if (flags & SOP_FLAG_CHECK) { h = Find_Key(VAL_SERIES(val2), hser, item, skip, cased, 1); h = (h >= 0); if (flags & SOP_FLAG_INVERT) h = !h; } if (h) Find_Key(buffer, hret, item, skip, cased, 2); } if (flags & SOP_FLAG_CHECK) Free_Series(hser); if (!first_pass) break; first_pass = FALSE; // Iterate over second series? if ((i = ((flags & SOP_FLAG_BOTH) != 0))) { const REBVAL *temp = val1; val1 = val2; val2 = temp; } } while (i); if (hret) Free_Series(hret); out_ser = Copy_Array_Shallow(buffer); RESET_TAIL(buffer); // required - allow reuse } else { if (IS_BINARY(val1)) { // All binaries use "case-sensitive" comparison (e.g. each byte // is treated distinctly) cased = TRUE; } buffer = BUF_MOLD; Reset_Buffer(buffer, i); RESET_TAIL(buffer); do { REBSER *ser = VAL_SERIES(val1); // val1 and val2 swapped 2nd pass! REBUNI uc; // Iterate over first series: i = VAL_INDEX(val1); for (; i < SERIES_TAIL(ser); i += skip) { uc = GET_ANY_CHAR(ser, i); if (flags & SOP_FLAG_CHECK) { h = (NOT_FOUND != Find_Str_Char( VAL_SERIES(val2), 0, VAL_INDEX(val2), VAL_TAIL(val2), skip, uc, cased ? AM_FIND_CASE : 0 )); if (flags & SOP_FLAG_INVERT) h = !h; } if (!h) continue; if ( NOT_FOUND == Find_Str_Char( buffer, 0, 0, SERIES_TAIL(buffer), skip, uc, cased ? AM_FIND_CASE : 0 ) ) { Append_String(buffer, ser, i, skip); } } if (!first_pass) break; first_pass = FALSE; // Iterate over second series? if ((i = ((flags & SOP_FLAG_BOTH) != 0))) { const REBVAL *temp = val1; val1 = val2; val2 = temp; } } while (i); out_ser = Copy_String(buffer, 0, -1); } return out_ser; }
*/ static REBINT Do_Set_Operation(struct Reb_Call *call_, REBCNT flags) /* ** Do set operations on a series. ** ***********************************************************************/ { REBVAL *val; REBVAL *val1; REBVAL *val2 = 0; REBSER *ser; REBSER *hser = 0; // hash table for series REBSER *retser; // return series REBSER *hret; // hash table for return series REBCNT i; REBINT h = TRUE; REBCNT skip = 1; // record size REBCNT cased = 0; // case sensitive when TRUE SET_NONE(D_OUT); val1 = D_ARG(1); i = 2; // Check for second series argument: if (flags != SET_OP_UNIQUE) { val2 = D_ARG(i++); if (VAL_TYPE(val1) != VAL_TYPE(val2)) raise Error_Unexpected_Type(VAL_TYPE(val1), VAL_TYPE(val2)); } // Refinements /case and /skip N cased = D_REF(i++); // cased if (D_REF(i++)) skip = Int32s(D_ARG(i), 1); switch (VAL_TYPE(val1)) { case REB_BLOCK: i = VAL_LEN(val1); // Setup result block: if (GET_FLAG(flags, SOP_BOTH)) i += VAL_LEN(val2); retser = BUF_EMIT; // use preallocated shared block Resize_Series(retser, i); hret = Make_Hash_Sequence(i); // allocated // Optimization note: !! // This code could be optimized for small blocks by not hashing them // and extending Find_Key to do a FIND on the value itself w/o the hash. do { // Check what is in series1 but not in series2: if (GET_FLAG(flags, SOP_CHECK)) hser = Hash_Block(val2, cased); // Iterate over first series: ser = VAL_SERIES(val1); i = VAL_INDEX(val1); for (; val = BLK_SKIP(ser, i), i < SERIES_TAIL(ser); i += skip) { if (GET_FLAG(flags, SOP_CHECK)) { h = Find_Key(VAL_SERIES(val2), hser, val, skip, cased, 1) >= 0; if (GET_FLAG(flags, SOP_INVERT)) h = !h; } if (h) Find_Key(retser, hret, val, skip, cased, 2); } // Iterate over second series? if ((i = GET_FLAG(flags, SOP_BOTH))) { val = val1; val1 = val2; val2 = val; CLR_FLAG(flags, SOP_BOTH); } if (GET_FLAG(flags, SOP_CHECK)) Free_Series(hser); } while (i); if (hret) Free_Series(hret); Val_Init_Block(D_OUT, Copy_Array_Shallow(retser)); RESET_TAIL(retser); // required - allow reuse break; case REB_BINARY: cased = TRUE; SET_TYPE(D_OUT, REB_BINARY); case REB_STRING: i = VAL_LEN(val1); // Setup result block: if (GET_FLAG(flags, SOP_BOTH)) i += VAL_LEN(val2); retser = BUF_MOLD; Reset_Buffer(retser, i); RESET_TAIL(retser); do { REBUNI uc; cased = cased ? AM_FIND_CASE : 0; // Iterate over first series: ser = VAL_SERIES(val1); i = VAL_INDEX(val1); for (; i < SERIES_TAIL(ser); i += skip) { uc = GET_ANY_CHAR(ser, i); if (GET_FLAG(flags, SOP_CHECK)) { h = Find_Str_Char(VAL_SERIES(val2), 0, VAL_INDEX(val2), VAL_TAIL(val2), skip, uc, cased) != NOT_FOUND; if (GET_FLAG(flags, SOP_INVERT)) h = !h; } if (h && (Find_Str_Char(retser, 0, 0, SERIES_TAIL(retser), skip, uc, cased) == NOT_FOUND)) { Append_String(retser, ser, i, skip); } } // Iterate over second series? if ((i = GET_FLAG(flags, SOP_BOTH))) { val = val1; val1 = val2; val2 = val; CLR_FLAG(flags, SOP_BOTH); } } while (i); ser = Copy_String(retser, 0, -1); if (IS_BINARY(D_OUT)) Val_Init_Binary(D_OUT, ser); else Val_Init_String(D_OUT, ser); break; case REB_BITSET: switch (flags) { case SET_OP_UNIQUE: return R_ARG1; case SET_OP_UNION: i = A_OR; break; case SET_OP_INTERSECT: i = A_AND; break; case SET_OP_DIFFERENCE: i = A_XOR; break; case SET_OP_EXCLUDE: i = 0; // special case break; } ser = Xandor_Binary(i, val1, val2); Val_Init_Bitset(D_OUT, ser); break; case REB_TYPESET: switch (flags) { case SET_OP_UNIQUE: break; case SET_OP_UNION: VAL_TYPESET(val1) |= VAL_TYPESET(val2); break; case SET_OP_INTERSECT: VAL_TYPESET(val1) &= VAL_TYPESET(val2); break; case SET_OP_DIFFERENCE: VAL_TYPESET(val1) ^= VAL_TYPESET(val2); break; case SET_OP_EXCLUDE: VAL_TYPESET(val1) &= ~VAL_TYPESET(val2); break; } return R_ARG1; default: raise Error_Invalid_Arg(val1); } return R_OUT; }
// // Find_Entry: C // // Try to find the entry in the map. If not found // and val is SET, create the entry and store the key and // val. // // RETURNS: the index to the VALUE or zero if there is none. // static REBCNT Find_Entry(REBSER *series, REBVAL *key, REBVAL *val) { REBSER *hser = series->extra.series; // can be null REBCNT *hashes; REBCNT hash; REBVAL *v; REBCNT n; if (IS_NONE(key)) return 0; // We may not be large enough yet for the hash table to // be worthwhile, so just do a linear search: if (!hser) { if (series->tail < MIN_DICT*2) { v = BLK_HEAD(series); if (ANY_WORD(key)) { for (n = 0; n < series->tail; n += 2, v += 2) { if ( ANY_WORD(v) && SAME_SYM(VAL_WORD_SYM(key), VAL_WORD_SYM(v)) ) { if (val) *++v = *val; return n/2+1; } } } else if (ANY_BINSTR(key)) { for (n = 0; n < series->tail; n += 2, v += 2) { if (VAL_TYPE(key) == VAL_TYPE(v) && 0 == Compare_String_Vals(key, v, (REBOOL)!IS_BINARY(v))) { if (val) *++v = *val; return n/2+1; } } } else if (IS_INTEGER(key)) { for (n = 0; n < series->tail; n += 2, v += 2) { if (IS_INTEGER(v) && VAL_INT64(key) == VAL_INT64(v)) { if (val) *++v = *val; return n/2+1; } } } else if (IS_CHAR(key)) { for (n = 0; n < series->tail; n += 2, v += 2) { if (IS_CHAR(v) && VAL_CHAR(key) == VAL_CHAR(v)) { if (val) *++v = *val; return n/2+1; } } } else fail (Error_Has_Bad_Type(key)); if (!val) return 0; Append_Value(series, key); Append_Value(series, val); // does not copy value, e.g. if string return series->tail/2; } // Add hash table: //Print("hash added %d", series->tail); series->extra.series = hser = Make_Hash_Sequence(series->tail); MANAGE_SERIES(hser); Rehash_Hash(series); } // Get hash table, expand it if needed: if (series->tail > hser->tail/2) { Expand_Hash(hser); // modifies size value Rehash_Hash(series); } hash = Find_Key(series, hser, key, 2, 0, 0); hashes = (REBCNT*)hser->data; n = hashes[hash]; // Just a GET of value: if (!val) return n; // Must set the value: if (n) { // re-set it: *BLK_SKIP(series, ((n-1)*2)+1) = *val; // set it return n; } // Create new entry: Append_Value(series, key); Append_Value(series, val); // does not copy value, e.g. if string return (hashes[hash] = series->tail/2); }
*/ static REBCNT Find_Entry(REBSER *series, REBVAL *key, REBVAL *val) /* ** Try to find the entry in the map. If not found ** and val is SET, create the entry and store the key and ** val. ** ** RETURNS: the index to the VALUE or zero if there is none. ** ***********************************************************************/ { REBSER *hser = series->extra.series; // can be null REBCNT *hashes; REBCNT hash; REBVAL *v; REBCNT n; if (IS_NONE(key)) return 0; // We may not be large enough yet for the hash table to // be worthwhile, so just do a linear search: if (!hser) { if (series->tail < MIN_DICT*2) { v = BLK_HEAD(series); if (ANY_WORD(key)) { for (n = 0; n < series->tail; n += 2, v += 2) { if (ANY_WORD(v) && SAME_SYM(key, v)) { if (val) *++v = *val; return n/2+1; } } } else if (ANY_BINSTR(key)) { for (n = 0; n < series->tail; n += 2, v += 2) { if (VAL_TYPE(key) == VAL_TYPE(v) && 0 == Compare_String_Vals(key, v, (REBOOL)!IS_BINARY(v))) { if (val) { *++v = *val; // VAL_SERIES(v) = Copy_Series_Value(val); // VAL_INDEX(v) = 0; } return n/2+1; } } } else if (IS_INTEGER(key)) { for (n = 0; n < series->tail; n += 2, v += 2) { if (IS_INTEGER(v) && VAL_INT64(key) == VAL_INT64(v)) { if (val) *++v = *val; return n/2+1; } } } else if (IS_CHAR(key)) { for (n = 0; n < series->tail; n += 2, v += 2) { if (IS_CHAR(v) && VAL_CHAR(key) == VAL_CHAR(v)) { if (val) *++v = *val; return n/2+1; } } } else Trap_Type_DEAD_END(key); if (!val) return 0; Append_Value(series, key); Append_Value(series, val); // no Copy_Series_Value(val) on strings return series->tail/2; } // Add hash table: //Print("hash added %d", series->tail); series->extra.series = hser = Make_Hash_Array(series->tail); Rehash_Hash(series); } // Get hash table, expand it if needed: if (series->tail > hser->tail/2) { Expand_Hash(hser); // modifies size value Rehash_Hash(series); } hash = Find_Key(series, hser, key, 2, 0, 0); hashes = (REBCNT*)hser->data; n = hashes[hash]; // Just a GET of value: if (!val) return n; // Must set the value: if (n) { // re-set it: *BLK_SKIP(series, ((n-1)*2)+1) = *val; // set it return n; } // Create new entry: Append_Value(series, key); Append_Value(series, val); // no Copy_Series_Value(val) on strings return (hashes[hash] = series->tail/2); }