String* String::find_character(STATE, Fixnum* offset) { size_t o = (size_t)offset->to_native(); if(o >= size()) return (String*)Qnil; uint8_t* cur = byte_address() + o; String* output = 0; kcode::table* tbl = state->shared.kcode_table(); if(kcode::mbchar_p(tbl, *cur)) { size_t clen = kcode::mbclen(tbl, *cur); if(o + clen <= size()) { output = String::create(state, reinterpret_cast<const char*>(cur), clen); } } if(!output) { output = String::create(state, reinterpret_cast<const char*>(cur), 1); } output->klass(state, class_object(state)); if(RTEST(tainted_p(state))) output->taint(state); return output; }
String* String::substring(STATE, Fixnum* start_f, Fixnum* count_f) { native_int start = start_f->to_native(); native_int count = count_f->to_native(); native_int total = num_bytes_->to_native(); if(count < 0) return (String*)Qnil; if(start < 0) { start += total; if(start < 0) return (String*)Qnil; } if(start > total) return (String*)Qnil; if(start + count > total) { count = total - start; } if(count < 0) count = 0; String* sub = String::create(state, Fixnum::from(count)); sub->klass(state, class_object(state)); uint8_t* buf = byte_address() + start; memcpy(sub->byte_address(), buf, count); if(tainted_p(state) == Qtrue) sub->taint(state); return sub; }
String* String::substring(STATE, Fixnum* start_f, Fixnum* count_f) { native_int start = start_f->to_native(); native_int count = count_f->to_native(); native_int total = num_bytes_->to_native(); native_int data_size = as<CharArray>(data_)->size(); // Clamp the string size the maximum underlying byte array size if(unlikely(total > data_size)) { total = data_size; } if(count < 0) return nil<String>(); if(start < 0) { start += total; if(start < 0) return nil<String>(); } if(start > total) return nil<String>(); if(start + count > total) { count = total - start; } if(count < 0) count = 0; String* sub = String::create(state, Fixnum::from(count)); sub->klass(state, class_object(state)); uint8_t* buf = byte_address() + start; memcpy(sub->byte_address(), buf, count); if(tainted_p(state) == Qtrue) sub->taint(state); return sub; }
String* String::transform(STATE, Tuple* tbl, Object* respect_kcode) { uint8_t* cur = byte_address(); uint8_t* fin = cur + size(); if(tbl->num_fields() < 256) { return (String*)Primitives::failure(); } Object** tbl_ptr = tbl->field; kcode::table* kcode_tbl = 0; if(RTEST(respect_kcode)) { kcode_tbl = state->shared.kcode_table(); } else { kcode_tbl = kcode::null_table(); } // Calculate the final size of result size_t size = 0; while(cur < fin) { uint8_t byte = *cur; if(kcode::mbchar_p(kcode_tbl, byte)) { size_t clen = kcode::mbclen(kcode_tbl, byte); size += clen; cur += clen; continue; } else { size += as<String>(tbl_ptr[byte])->size(); } cur++; } cur = byte_address(); String* result = String::create(state, Fixnum::from(size)); // Since we precalculated the size, we can write directly into result uint8_t* output = result->byte_address(); while(cur < fin) { uint8_t byte = *cur; if(kcode::mbchar_p(kcode_tbl, byte)) { size_t len = kcode::mbclen(kcode_tbl, byte); memcpy(output, cur, len); output += len; cur += len; continue; } else { // Not unsafe, because we've type checked tbl_ptr above String* what = force_as<String>(tbl_ptr[byte]); uint8_t* what_buf = what->byte_address(); switch(what->size()) { case 1: *output++ = *what_buf; break; case 2: *output++ = *what_buf++; *output++ = *what_buf; break; case 3: *output++ = *what_buf++; *output++ = *what_buf++; *output++ = *what_buf; break; default: memcpy(output, what_buf, what->size()); output += what->size(); break; } } cur++; } if(tainted_p(state)) result->taint(state); return result; }
String* String::transform(STATE, Tuple* tbl, Object* respect_kcode) { uint8_t invalid[5]; if(tbl->num_fields() < 256) { return force_as<String>(Primitives::failure()); } Object** tbl_ptr = tbl->field; kcode::table* kcode_tbl = 0; if(RTEST(respect_kcode)) { kcode_tbl = state->shared().kcode_table(); } else { kcode_tbl = kcode::null_table(); } // Pointers to iterate input bytes. uint8_t* in_p = byte_address(); native_int str_size = size(); native_int data_size = as<CharArray>(data_)->size(); if(unlikely(str_size > data_size)) { str_size = data_size; } uint8_t* in_end = in_p + str_size; // Optimistic estimate that output size will be 1.25 x input. native_int out_chunk = str_size * 5 / 4; native_int out_size = out_chunk; uint8_t* output = (uint8_t*)malloc(out_size); uint8_t* out_p = output; uint8_t* out_end = out_p + out_size; while(in_p < in_end) { native_int len = 0; uint8_t byte = *in_p; uint8_t* cur_p = 0; if(kcode::mbchar_p(kcode_tbl, byte)) { len = kcode::mbclen(kcode_tbl, byte); native_int rem = in_end - in_p; // if the character length is greater than the remaining // bytes, we have a malformed character. Handled below. if(rem >= len) { cur_p = in_p; in_p += len; } } else if(String* str = try_as<String>(tbl_ptr[byte])) { cur_p = str->byte_address(); len = str->size(); in_p++; } else { Tuple* tbl = as<Tuple>(tbl_ptr[byte]); for(native_int i = 0; i < tbl->num_fields(); i += 2) { String* key = as<String>(tbl->at(i)); native_int rem = in_end - in_p; native_int klen = key->size(); if(rem < klen) continue; if(memcmp(in_p, key->byte_address(), klen) == 0) { String* str = as<String>(tbl->at(i+1)); cur_p = str->byte_address(); len = str->size(); in_p += klen; break; } } } // We could not map this byte, so we add it to the output // in stringified octal notation (ie \nnn). if(!cur_p) { snprintf((char*)invalid, 5, "\\%03o", *((char*)in_p) & 0377); in_p++; cur_p = invalid; len = 4; } if(out_p + len > out_end) { native_int pos = out_p - output; out_size += (len > out_chunk ? len : out_chunk); output = (uint8_t*)realloc(output, out_size); out_p = output + pos; out_end = output + out_size; } switch(len) { case 1: *out_p++ = *cur_p; break; case 2: *out_p++ = *cur_p++; *out_p++ = *cur_p; break; case 3: *out_p++ = *cur_p++; *out_p++ = *cur_p++; *out_p++ = *cur_p; break; default: memcpy(out_p, cur_p, len); out_p += len; break; } } String* result = String::create(state, reinterpret_cast<const char*>(output), out_p - output); free(output); if(tainted_p(state)) result->taint(state); return result; }