*/ void Insert_String(REBSER *dst, REBCNT idx, const REBSER *src, REBCNT pos, REBCNT len, REBFLG no_expand) /* ** Insert a non-encoded string into a series at given index. ** Source and/or destination can be 1 or 2 bytes wide. ** If destination is not wide enough, it will be widened. ** ***********************************************************************/ { REBUNI *up; REBYTE *bp; REBCNT n; if (idx > dst->tail) idx = dst->tail; if (!no_expand) Expand_Series(dst, idx, len); // tail changed too // Src and dst have same width (8 or 16): if (SERIES_WIDE(dst) == SERIES_WIDE(src)) { cp_same: if (BYTE_SIZE(dst)) memcpy(BIN_SKIP(dst, idx), BIN_SKIP(src, pos), len); else memcpy(UNI_SKIP(dst, idx), UNI_SKIP(src, pos), sizeof(REBUNI) * len); return; } // Src is 8 and dst is 16: if (!BYTE_SIZE(dst)) { bp = BIN_SKIP(src, pos); up = UNI_SKIP(dst, idx); for (n = 0; n < len; n++) up[n] = (REBUNI)bp[n]; return; } // Src is 16 and dst is 8: bp = BIN_SKIP(dst, idx); up = UNI_SKIP(src, pos); for (n = 0; n < len; n++) { if (up[n] > 0xFF) { //Debug_Num("##Widen-series because char value is:", up[n]); // Expand dst and restart: idx += n; pos += n; len -= n; Widen_String(dst, TRUE); goto cp_same; } bp[n] = (REBYTE)up[n]; } }
*/ RL_API int RL_Get_String(REBSER *series, u32 index, void **str) /* ** Obtain a pointer into a string (bytes or unicode). ** ** Returns: ** The length and type of string. When len > 0, string is unicode. ** When len < 0, string is bytes. ** Arguments: ** series - string series pointer ** index - index from beginning (zero-based) ** str - pointer to first character ** Notes: ** If the len is less than zero, then the string is optimized to ** codepoints (chars) 255 or less for ASCII and LATIN-1 charsets. ** Strings are allowed to move in memory. Therefore, you will want ** to make a copy of the string if needed. ** ***********************************************************************/ { // ret: len or -len int len = (index >= series->tail) ? 0 : series->tail - index; if (BYTE_SIZE(series)) { *str = BIN_SKIP(series, index); len = -len; } else { *str = UNI_SKIP(series, index); } return len; }
*/ void Enline_Uni(REBSER *ser, REBCNT idx, REBCNT len) /* ***********************************************************************/ { REBCNT cnt = 0; REBUNI *bp; REBUNI c = 0; REBCNT tail; // Calculate the size difference by counting the number of LF's // that have no CR's in front of them. bp = UNI_SKIP(ser, idx); for (; len > 0; len--) { if (*bp == LF && c != CR) cnt++; c = *bp++; } if (cnt == 0) return; // Extend series: len = SERIES_TAIL(ser); // before expansion EXPAND_SERIES_TAIL(ser, cnt); tail = SERIES_TAIL(ser); // after expansion bp = UNI_HEAD(ser); // expand may change it // Add missing CRs: while (cnt > 0) { bp[tail--] = bp[len]; // Copy src to dst. if (bp[len] == LF && (len == 0 || bp[len - 1] != CR)) { bp[tail--] = CR; cnt--; } len--; } }
*/ REBSER *Copy_String(REBSER *src, REBCNT index, REBINT length) /* ** Copies a portion of any string (byte or unicode). ** Will slim the string, if needed. ** ** The index + length must be in range unsigned int 32. ** ***********************************************************************/ { REBUNI *up; REBYTE wide = 1; REBSER *dst; REBINT n; if (length < 0) length = src->tail; // Can it be slimmed down? if (!BYTE_SIZE(src)) { up = UNI_SKIP(src, index); for (n = 0; n < length; n++) if (up[n] > 0xff) break; if (n < length) wide = sizeof(REBUNI); } dst = Make_Series(length + 1, wide, MKS_NONE); Insert_String(dst, 0, src, index, length, TRUE); SERIES_TAIL(dst) = length; TERM_SEQUENCE(dst); return dst; }
*/ REBUNI *Prep_Uni_Series(REB_MOLD *mold, REBCNT len) /* ***********************************************************************/ { REBCNT tail = SERIES_TAIL(mold->series); EXPAND_SERIES_TAIL(mold->series, len); return UNI_SKIP(mold->series, tail); }
STOID Mold_Uni_Char(REBSER *dst, REBUNI chr, REBOOL molded, REBOOL parened) { REBCNT tail = SERIES_TAIL(dst); REBUNI *up; if (!molded) { EXPAND_SERIES_TAIL(dst, 1); *UNI_SKIP(dst, tail) = chr; } else { EXPAND_SERIES_TAIL(dst, 10); // worst case: #"^(1234)" up = UNI_SKIP(dst, tail); *up++ = '#'; *up++ = '"'; up = Emit_Uni_Char(up, chr, parened); *up++ = '"'; dst->tail = up - UNI_HEAD(dst); } UNI_TERM(dst); }
*/ void Trim_Tail(REBSER *src, REBYTE chr) /* ** Used to trim off hanging spaces during FORM and MOLD. ** ***********************************************************************/ { REBOOL is_uni = !BYTE_SIZE(src); REBCNT tail; REBUNI c; assert(!Is_Array_Series(src)); for (tail = SERIES_TAIL(src); tail > 0; tail--) { c = is_uni ? *UNI_SKIP(src, tail - 1) : *BIN_SKIP(src, tail - 1); if (c != chr) break; } SERIES_TAIL(src) = tail; TERM_SEQUENCE(src); }
*/ REBYTE *Scan_Item(REBYTE *src, REBYTE *end, REBUNI term, REBYTE *invalid) /* ** Scan as UTF8 an item like a file or URL. ** ** Returns continuation point or zero for error. ** ** Put result into the MOLD_BUF as uni-chars. ** ***********************************************************************/ { REBUNI c; REBSER *buf; buf = BUF_MOLD; RESET_TAIL(buf); while (src < end && *src != term) { c = *src; // End of stream? if (c == 0) break; // If no term, then any white will terminate: if (!term && IS_WHITE(c)) break; // Ctrl chars are invalid: if (c < ' ') return 0; // invalid char if (c == '\\') c = '/'; // Accept %xx encoded char: else if (c == '%') { if (!Scan_Hex2(src+1, &c, FALSE)) return 0; src += 2; } // Accept ^X encoded char: else if (c == '^') { if (src+1 == end) return 0; // nothing follows ^ c = Scan_Char(&src); if (!term && IS_WHITE(c)) break; src--; } // Accept UTF8 encoded char: else if (c >= 0x80) { c = Decode_UTF8_Char(&src, 0); // zero on error if (c == 0) return 0; } // Is char as literal valid? (e.g. () [] etc.) else if (invalid && strchr(invalid, c)) return 0; src++; *UNI_SKIP(buf, buf->tail) = c; // not affected by Extend_Series if (++(buf->tail) >= SERIES_REST(buf)) Extend_Series(buf, 1); } if (*src && *src == term) src++; UNI_TERM(buf); return src; }
*/ REBYTE *Scan_Quote(REBYTE *src, SCAN_STATE *scan_state) /* ** Scan a quoted string, handling all the escape characters. ** ** The result will be put into the temporary MOLD_BUF unistring. ** ***********************************************************************/ { REBINT nest = 0; REBUNI term; REBINT chr; REBCNT lines = 0; REBSER *buf = BUF_MOLD; RESET_TAIL(buf); term = (*src++ == '{') ? '}' : '"'; // pick termination while (*src != term || nest > 0) { chr = *src; switch (chr) { case 0: return 0; // Scan_state shows error location. case '^': chr = Scan_Char(&src); if (chr == -1) return 0; src--; break; case '{': if (term != '"') nest++; break; case '}': if (term != '"' && nest > 0) nest--; break; case CR: if (src[1] == LF) src++; // fall thru case LF: if (term == '"') return 0; lines++; chr = LF; break; default: if (chr >= 0x80) { chr = Decode_UTF8_Char(&src, 0); // zero on error if (chr == 0) return 0; } } src++; *UNI_SKIP(buf, buf->tail) = chr; if (++(buf->tail) >= SERIES_REST(buf)) Extend_Series(buf, 1); } src++; // Skip ending quote or brace. if (scan_state) scan_state->line_count += lines; UNI_TERM(buf); return src; }