*/ REBINT PD_File(REBPVS *pvs) /* ***********************************************************************/ { REBSER *ser; REB_MOLD mo = {0}; REBCNT n; REBUNI c; REBSER *arg; if (pvs->setval) return PE_BAD_SET; ser = Copy_Series_Value(pvs->value); n = SERIES_TAIL(ser); if (n > 0) c = GET_ANY_CHAR(ser, n-1); if (n == 0 || c != '/') Append_Byte(ser, '/'); if (ANY_STR(pvs->select)) arg = VAL_SERIES(pvs->select); else { Reset_Mold(&mo); Mold_Value(&mo, pvs->select, 0); arg = mo.series; } c = GET_ANY_CHAR(arg, 0); n = (c == '/' || c == '\\') ? 1 : 0; Append_String(ser, arg, n, arg->tail-n); Set_Series(VAL_TYPE(pvs->value), pvs->store, ser); return PE_USE; }
*/ REBSER *Temp_Bin_Str_Managed(REBVAL *val, REBCNT *index, REBCNT *length) /* ** Determines if UTF8 conversion is needed for a series before it ** is used with a byte-oriented function. ** ** If conversion is needed, a UTF8 series will be created. Otherwise, ** the source series is returned as-is. ** ** Note: This routine should only be used to generate a value used ** for temporary purposes, because it has a "surprising variance" ** regarding its input. If the value's series can be reused, it is-- ** and this depends on an implementation detail of internal encoding ** that the user should not be aware of (they need not know if the ** internal representation of an ASCII string uses 1, 2, or however ** many bytes). But copying vs. non-copying means the resulting ** data might or might not have previous values available to step ** back into from the originating series! ** ** !!! Should performance dictate it, the callsites could be ** adapted to know whether this produced a new series or not, and ** instead of managing a created result they could be responsible ** for freeing it if so. ** ***********************************************************************/ { REBCNT len = (length && *length) ? *length : VAL_LEN(val); REBSER *series; assert(IS_BINARY(val) || ANY_STR(val)); if (len == 0 || IS_BINARY(val) || VAL_STR_IS_ASCII(val)) { // If it's zero length, BINARY!, or an ANY-STRING! whose bytes are // all values less than 128, we reuse the series. series = VAL_SERIES(val); ASSERT_SERIES_MANAGED(series); if (index) *index = VAL_INDEX(val); if (length) *length = len; } else { // UTF-8 conversion is required, and we manage the result. series = Make_UTF8_From_Any_String(val, len, OPT_ENC_CRLF_MAYBE); MANAGE_SERIES(series); if (index) *index = 0; if (length) *length = SERIES_TAIL(series); } return series; }
*/ void Mold_Value(REB_MOLD *mold, REBVAL *value, REBFLG molded) /* ** Mold or form any value to string series tail. ** ***********************************************************************/ { REBYTE buf[60]; REBINT len; REBSER *ser = mold->series; CHECK_STACK(&len); ASSERT2(SERIES_WIDE(mold->series) == sizeof(REBUNI), RP_BAD_SIZE); ASSERT2(ser, RP_NO_BUFFER); // Special handling of string series: { if (ANY_STR(value) && !IS_TAG(value)) { // Forming a string: if (!molded) { Insert_String(ser, -1, VAL_SERIES(value), VAL_INDEX(value), VAL_LEN(value), 0); return; } // Special format for ALL string series when not at head: if (GET_MOPT(mold, MOPT_MOLD_ALL) && VAL_INDEX(value) != 0) { Mold_All_String(value, mold); return; } } switch (VAL_TYPE(value)) { case REB_NONE: Emit(mold, "+N", SYM_NONE); break; case REB_LOGIC: // if (!molded || !VAL_LOGIC_WORDS(value) || !GET_MOPT(mold, MOPT_MOLD_ALL)) Emit(mold, "+N", VAL_LOGIC(value) ? SYM_TRUE : SYM_FALSE); // else // Mold_Logic(mold, value); break; case REB_INTEGER: len = Emit_Integer(buf, VAL_INT64(value)); goto append; case REB_DECIMAL: case REB_PERCENT: len = Emit_Decimal(buf, VAL_DECIMAL(value), IS_PERCENT(value)?DEC_MOLD_PERCENT:0, Punctuation[GET_MOPT(mold, MOPT_COMMA_PT) ? PUNCT_COMMA : PUNCT_DOT], mold->digits); goto append; case REB_MONEY: len = Emit_Money(value, buf, mold->opts); goto append; case REB_CHAR: Mold_Uni_Char(ser, VAL_CHAR(value), (REBOOL)molded, (REBOOL)GET_MOPT(mold, MOPT_MOLD_ALL)); break; case REB_PAIR: len = Emit_Decimal(buf, VAL_PAIR_X(value), DEC_MOLD_MINIMAL, Punctuation[PUNCT_DOT], mold->digits/2); Append_Bytes_Len(ser, buf, len); Append_Byte(ser, 'x'); len = Emit_Decimal(buf, VAL_PAIR_Y(value), DEC_MOLD_MINIMAL, Punctuation[PUNCT_DOT], mold->digits/2); Append_Bytes_Len(ser, buf, len); //Emit(mold, "IxI", VAL_PAIR_X(value), VAL_PAIR_Y(value)); break; case REB_TUPLE: len = Emit_Tuple(value, buf); goto append; case REB_TIME: //len = Emit_Time(value, buf, Punctuation[GET_MOPT(mold, MOPT_COMMA_PT) ? PUNCT_COMMA : PUNCT_DOT]); Emit_Time(mold, value); break; case REB_DATE: Emit_Date(mold, value); break; case REB_STRING: // FORM happens in top section. Mold_String_Series(value, mold); break; case REB_BINARY: if (GET_MOPT(mold, MOPT_MOLD_ALL) && VAL_INDEX(value) != 0) { Mold_All_String(value, mold); return; } Mold_Binary(value, mold); break; case REB_FILE: if (VAL_LEN(value) == 0) { Append_Bytes(ser, "%\"\""); break; } Mold_File(value, mold); break; case REB_EMAIL: case REB_URL: Mold_Url(value, mold); break; case REB_TAG: if (GET_MOPT(mold, MOPT_MOLD_ALL) && VAL_INDEX(value) != 0) { Mold_All_String(value, mold); return; } Mold_Tag(value, mold); break; // Mold_Issue(value, mold); // break; case REB_BITSET: Pre_Mold(value, mold); // #[bitset! or make bitset! Mold_Bitset(value, mold); End_Mold(mold); break; case REB_IMAGE: Pre_Mold(value, mold); if (!GET_MOPT(mold, MOPT_MOLD_ALL)) { Append_Byte(ser, '['); Mold_Image_Data(value, mold); Append_Byte(ser, ']'); End_Mold(mold); } else { REBVAL val = *value; VAL_INDEX(&val) = 0; // mold all of it Mold_Image_Data(&val, mold); Post_Mold(value, mold); } break; case REB_BLOCK: case REB_PAREN: if (!molded) Form_Block_Series(VAL_SERIES(value), VAL_INDEX(value), mold, 0); else Mold_Block(value, mold); break; case REB_PATH: case REB_SET_PATH: case REB_GET_PATH: case REB_LIT_PATH: Mold_Block(value, mold); break; case REB_VECTOR: Mold_Vector(value, mold, molded); break; case REB_DATATYPE: if (!molded) Emit(mold, "N", VAL_DATATYPE(value) + 1); else Emit(mold, "+DN", SYM_DATATYPE_TYPE, VAL_DATATYPE(value) + 1); break; case REB_TYPESET: Mold_Typeset(value, mold, molded); break; case REB_WORD: // This is a high frequency function, so it is optimized. Append_UTF8(ser, Get_Sym_Name(VAL_WORD_SYM(value)), -1); break; case REB_SET_WORD: Emit(mold, "W:", value); break; case REB_GET_WORD: Emit(mold, ":W", value); break; case REB_LIT_WORD: Emit(mold, "\'W", value); break; case REB_REFINEMENT: Emit(mold, "/W", value); break; case REB_ISSUE: Emit(mold, "#W", value); break; case REB_CLOSURE: case REB_FUNCTION: case REB_NATIVE: case REB_ACTION: case REB_COMMAND: Mold_Function(value, mold); break; case REB_OBJECT: case REB_MODULE: case REB_PORT: if (!molded) Form_Object(value, mold); else Mold_Object(value, mold); break; case REB_TASK: Mold_Object(value, mold); //// | (1<<MOPT_NO_NONE)); break; case REB_ERROR: Mold_Error(value, mold, molded); break; case REB_MAP: Mold_Map(value, mold, molded); break; case REB_GOB: { REBSER *blk; Pre_Mold(value, mold); blk = Gob_To_Block(VAL_GOB(value)); Mold_Block_Series(mold, blk, 0, 0); End_Mold(mold); } break; case REB_EVENT: Mold_Event(value, mold); break; case REB_REBCODE: case REB_OP: case REB_FRAME: case REB_HANDLE: case REB_STRUCT: case REB_LIBRARY: case REB_UTYPE: // Value has no printable form, so just print its name. if (!molded) Emit(mold, "?T?", value); else Emit(mold, "+T", value); break; case REB_END: case REB_UNSET: if (molded) Emit(mold, "+T", value); break; default: Crash(RP_DATATYPE+5, VAL_TYPE(value)); } return; append: Append_Bytes_Len(ser, buf, len); }
x*/ void Modify_StringX(REBCNT action, REBVAL *string, REBVAL *arg) /* ** Actions: INSERT, APPEND, CHANGE ** ** string [string!] {Series at point to insert} ** value [any-type!] {The value to insert} ** /part {Limits to a given length or position.} ** length [number! series! pair!] ** /only {Inserts a series as a series.} ** /dup {Duplicates the insert a specified number of times.} ** count [number! pair!] ** ***********************************************************************/ { REBSER *series = VAL_SERIES(string); REBCNT index = VAL_INDEX(string); REBCNT tail = VAL_TAIL(string); REBINT rlen; // length to be removed REBINT ilen = 1; // length to be inserted REBINT cnt = 1; // DUP count REBINT size; REBVAL *val; REBSER *arg_ser = 0; // argument series // Length of target (may modify index): (arg can be anything) rlen = Partial1((action == A_CHANGE) ? string : arg, DS_ARG(AN_LENGTH)); index = VAL_INDEX(string); if (action == A_APPEND || index > tail) index = tail; // If the arg is not a string, then we need to create a string: if (IS_BINARY(string)) { if (IS_INTEGER(arg)) { if (VAL_INT64(arg) > 255 || VAL_INT64(arg) < 0) Trap_Range(arg); arg_ser = Make_Binary(1); Append_Byte(arg_ser, VAL_CHAR(arg)); // check for size!!! } else if (!ANY_BINSTR(arg)) Trap_Arg(arg); } else if (IS_BLOCK(arg)) { // MOVE! REB_MOLD mo = {0}; arg_ser = mo.series = Make_Unicode(VAL_BLK_LEN(arg) * 10); // GC!? for (val = VAL_BLK_DATA(arg); NOT_END(val); val++) Mold_Value(&mo, val, 0); } else if (IS_CHAR(arg)) { // Optimize this case !!! arg_ser = Make_Unicode(1); Append_Byte(arg_ser, VAL_CHAR(arg)); } else if (!ANY_STR(arg) || IS_TAG(arg)) { arg_ser = Copy_Form_Value(arg, 0); } if (arg_ser) Set_String(arg, arg_ser); else arg_ser = VAL_SERIES(arg); // Length of insertion: ilen = (action != A_CHANGE && DS_REF(AN_PART)) ? rlen : VAL_LEN(arg); // If Source == Destination we need to prevent possible conflicts. // Clone the argument just to be safe. // (Note: It may be possible to optimize special cases like append !!) if (series == VAL_SERIES(arg)) { arg_ser = Copy_Series_Part(arg_ser, VAL_INDEX(arg), ilen); // GC!? } // Get /DUP count: if (DS_REF(AN_DUP)) { cnt = Int32(DS_ARG(AN_COUNT)); if (cnt <= 0) return; // no changes } // Total to insert: size = cnt * ilen; if (action != A_CHANGE) { // Always expand series for INSERT and APPEND actions: Expand_Series(series, index, size); } else { if (size > rlen) Expand_Series(series, index, size-rlen); else if (size < rlen && DS_REF(AN_PART)) Remove_Series(series, index, rlen-size); else if (size + index > tail) { EXPAND_SERIES_TAIL(series, size - (tail - index)); } } // For dup count: for (; cnt > 0; cnt--) { Insert_String(series, index, arg_ser, VAL_INDEX(arg), ilen, TRUE); index += ilen; } TERM_SERIES(series); VAL_INDEX(string) = (action == A_APPEND) ? 0 : index; }
*/ REBCNT Modify_String(REBCNT action, REBSER *dst_ser, REBCNT dst_idx, const REBVAL *src_val, REBCNT flags, REBINT dst_len, REBINT dups) /* ** action: INSERT, APPEND, CHANGE ** ** dst_ser: target ** dst_idx: position ** src_val: source ** flags: AN_PART ** dst_len: length to remove ** dups: dup count ** ** return: new dst_idx ** ***********************************************************************/ { REBSER *src_ser = 0; REBCNT src_idx = 0; REBCNT src_len; REBCNT tail = SERIES_TAIL(dst_ser); REBINT size; // total to insert REBOOL needs_free; REBINT limit; // For INSERT/PART and APPEND/PART if (action != A_CHANGE && GET_FLAG(flags, AN_PART)) limit = dst_len; // should be non-negative else limit = -1; if (limit == 0 || dups < 0) return (action == A_APPEND) ? 0 : dst_idx; if (action == A_APPEND || dst_idx > tail) dst_idx = tail; // If the src_val is not a string, then we need to create a string: if (GET_FLAG(flags, AN_SERIES)) { // used to indicate a BINARY series if (IS_INTEGER(src_val)) { src_ser = Make_Series_Codepoint(Int8u(src_val)); needs_free = TRUE; limit = -1; } else if (IS_BLOCK(src_val)) { src_ser = Join_Binary(src_val, limit); // NOTE: it's the shared FORM buffer! needs_free = FALSE; limit = -1; } else if (IS_CHAR(src_val)) { // "UTF-8 was originally specified to allow codepoints with up to // 31 bits (or 6 bytes). But with RFC3629, this was reduced to 4 // bytes max. to be more compatible to UTF-16." So depending on // which RFC you consider "the UTF-8", max size is either 4 or 6. src_ser = Make_Binary(6); src_ser->tail = Encode_UTF8_Char(BIN_HEAD(src_ser), VAL_CHAR(src_val)); needs_free = TRUE; limit = -1; } else if (ANY_STR(src_val)) { src_len = VAL_LEN(src_val); if (limit >= 0 && src_len > cast(REBCNT, limit)) src_len = limit; src_ser = Make_UTF8_From_Any_String(src_val, src_len, 0); needs_free = TRUE; limit = -1; } else if (!IS_BINARY(src_val)) raise Error_Invalid_Arg(src_val); } else if (IS_CHAR(src_val)) { src_ser = Make_Series_Codepoint(VAL_CHAR(src_val)); needs_free = TRUE; } else if (IS_BLOCK(src_val)) { src_ser = Form_Tight_Block(src_val); needs_free = TRUE; } else if (!ANY_STR(src_val) || IS_TAG(src_val)) { src_ser = Copy_Form_Value(src_val, 0); needs_free = TRUE; } // Use either new src or the one that was passed: if (src_ser) { src_len = SERIES_TAIL(src_ser); } else { src_ser = VAL_SERIES(src_val); src_idx = VAL_INDEX(src_val); src_len = VAL_LEN(src_val); needs_free = FALSE; } if (limit >= 0) src_len = limit; // If Source == Destination we need to prevent possible conflicts. // Clone the argument just to be safe. // (Note: It may be possible to optimize special cases like append !!) if (dst_ser == src_ser) { assert(!needs_free); src_ser = Copy_Sequence_At_Len(src_ser, src_idx, src_len); needs_free = TRUE; src_idx = 0; } // Total to insert: size = dups * src_len; if (action != A_CHANGE) { // Always expand dst_ser for INSERT and APPEND actions: Expand_Series(dst_ser, dst_idx, size); } else { if (size > dst_len) Expand_Series(dst_ser, dst_idx, size - dst_len); else if (size < dst_len && GET_FLAG(flags, AN_PART)) Remove_Series(dst_ser, dst_idx, dst_len - size); else if (size + dst_idx > tail) { EXPAND_SERIES_TAIL(dst_ser, size - (tail - dst_idx)); } } // For dup count: for (; dups > 0; dups--) { Insert_String(dst_ser, dst_idx, src_ser, src_idx, src_len, TRUE); dst_idx += src_len; } TERM_SEQUENCE(dst_ser); if (needs_free) { // If we did not use the series that was passed in, but rather // created an internal temporary one, we need to free it. Free_Series(src_ser); } return (action == A_APPEND) ? 0 : dst_idx; }
*/ REBCNT Modify_String(REBCNT action, REBSER *dst_ser, REBCNT dst_idx, const REBVAL *src_val, REBCNT flags, REBINT dst_len, REBINT dups) /* ** action: INSERT, APPEND, CHANGE ** ** dst_ser: target ** dst_idx: position ** src_val: source ** flags: AN_PART ** dst_len: length to remove ** dups: dup count ** ** return: new dst_idx ** ***********************************************************************/ { REBSER *src_ser = 0; REBCNT src_idx = 0; REBCNT src_len; REBCNT tail = SERIES_TAIL(dst_ser); REBINT size; // total to insert if (dups < 0) return (action == A_APPEND) ? 0 : dst_idx; if (action == A_APPEND || dst_idx > tail) dst_idx = tail; // If the src_val is not a string, then we need to create a string: if (GET_FLAG(flags, AN_SERIES)) { // used to indicate a BINARY series if (IS_INTEGER(src_val)) { src_ser = Append_Byte(0, Int8u(src_val)); // creates a binary } else if (IS_BLOCK(src_val)) { src_ser = Join_Binary(src_val); // NOTE: it's the shared FORM buffer! } else if (IS_CHAR(src_val)) { src_ser = Make_Binary(6); // (I hate unicode) src_ser->tail = Encode_UTF8_Char(BIN_HEAD(src_ser), VAL_CHAR(src_val)); } else if (!ANY_BINSTR(src_val)) Trap_Arg_DEAD_END(src_val); } else if (IS_CHAR(src_val)) { src_ser = Append_Byte(0, VAL_CHAR(src_val)); // unicode ok too } else if (IS_BLOCK(src_val)) { src_ser = Form_Tight_Block(src_val); } else if (!ANY_STR(src_val) || IS_TAG(src_val)) { src_ser = Copy_Form_Value(src_val, 0); } // Use either new src or the one that was passed: if (src_ser) { src_len = SERIES_TAIL(src_ser); } else { src_ser = VAL_SERIES(src_val); src_idx = VAL_INDEX(src_val); src_len = VAL_LEN(src_val); } // For INSERT or APPEND with /PART use the dst_len not src_len: if (action != A_CHANGE && GET_FLAG(flags, AN_PART)) src_len = dst_len; // If Source == Destination we need to prevent possible conflicts. // Clone the argument just to be safe. // (Note: It may be possible to optimize special cases like append !!) if (dst_ser == src_ser) { src_ser = Copy_Series_Part(src_ser, src_idx, src_len); src_idx = 0; } // Total to insert: size = dups * src_len; if (action != A_CHANGE) { // Always expand dst_ser for INSERT and APPEND actions: Expand_Series(dst_ser, dst_idx, size); } else { if (size > dst_len) Expand_Series(dst_ser, dst_idx, size - dst_len); else if (size < dst_len && GET_FLAG(flags, AN_PART)) Remove_Series(dst_ser, dst_idx, dst_len - size); else if (size + dst_idx > tail) { EXPAND_SERIES_TAIL(dst_ser, size - (tail - dst_idx)); } } // For dup count: for (; dups > 0; dups--) { Insert_String(dst_ser, dst_idx, src_ser, src_idx, src_len, TRUE); dst_idx += src_len; } TERM_SERIES(dst_ser); return (action == A_APPEND) ? 0 : dst_idx; }
*/ static To_Thru(REBPARSE *parse, REBCNT index, REBVAL *block, REBFLG is_thru) /* ***********************************************************************/ { REBSER *series = parse->series; REBCNT type = parse->type; REBVAL *blk; REBVAL *item; REBCNT cmd; REBCNT i; REBCNT len; for (; index <= series->tail; index++) { for (blk = VAL_BLK(block); NOT_END(blk); blk++) { item = blk; // Deal with words and commands if (IS_WORD(item)) { if (cmd = VAL_CMD(item)) { if (cmd == SYM_END) { if (index >= series->tail) { index = series->tail; goto found; } goto next; } else if (cmd == SYM_QUOTE) { item = ++blk; // next item is the quoted value if (IS_END(item)) goto bad_target; if (IS_PAREN(item)) { item = Do_Block_Value_Throw(item); // might GC } } else goto bad_target; } else { item = Get_Var(item); } } else if (IS_PATH(item)) { item = Get_Parse_Value(item); } // Try to match it: if (type >= REB_BLOCK) { if (ANY_BLOCK(item)) goto bad_target; i = Parse_Next_Block(parse, index, item, 0); if (i != NOT_FOUND) { if (!is_thru) i--; index = i; goto found; } } else if (type == REB_BINARY) { REBYTE ch1 = *BIN_SKIP(series, index); // Handle special string types: if (IS_CHAR(item)) { if (VAL_CHAR(item) > 0xff) goto bad_target; if (ch1 == VAL_CHAR(item)) goto found1; } else if (IS_BINARY(item)) { if (ch1 == *VAL_BIN_DATA(item)) { len = VAL_LEN(item); if (len == 1) goto found1; if (0 == Compare_Bytes(BIN_SKIP(series, index), VAL_BIN_DATA(item), len, 0)) { if (is_thru) index += len; goto found; } } } else if (IS_INTEGER(item)) { if (VAL_INT64(item) > 0xff) goto bad_target; if (ch1 == VAL_INT32(item)) goto found1; } else goto bad_target; } else { // String REBCNT ch1 = GET_ANY_CHAR(series, index); REBCNT ch2; if (!HAS_CASE(parse)) ch1 = UP_CASE(ch1); // Handle special string types: if (IS_CHAR(item)) { ch2 = VAL_CHAR(item); if (!HAS_CASE(parse)) ch2 = UP_CASE(ch2); if (ch1 == ch2) goto found1; } else if (ANY_STR(item)) { ch2 = VAL_ANY_CHAR(item); if (!HAS_CASE(parse)) ch2 = UP_CASE(ch2); if (ch1 == ch2) { len = VAL_LEN(item); if (len == 1) goto found1; i = Find_Str_Str(series, 0, index, SERIES_TAIL(series), 1, VAL_SERIES(item), VAL_INDEX(item), len, AM_FIND_MATCH | parse->flags); if (i != NOT_FOUND) { if (is_thru) i += len; index = i; goto found; } } } else if (IS_INTEGER(item)) { ch1 = GET_ANY_CHAR(series, index); // No casing! if (ch1 == (REBCNT)VAL_INT32(item)) goto found1; } else goto bad_target; } next: // Check for | (required if not end) blk++; if (IS_PAREN(blk)) blk++; if (IS_END(blk)) break; if (!IS_OR_BAR(blk)) { item = blk; goto bad_target; } } } return NOT_FOUND; found: if (IS_PAREN(blk+1)) Do_Block_Value_Throw(blk+1); return index; found1: if (IS_PAREN(blk+1)) Do_Block_Value_Throw(blk+1); return index + (is_thru ? 1 : 0); bad_target: Trap1(RE_PARSE_RULE, item); return 0; }