Ejemplo n.º 1
0
  String* String::find_character(STATE, Fixnum* offset) {
    size_t o = (size_t)offset->to_native();
    if(o >= size()) return (String*)Qnil;

    uint8_t* cur = byte_address() + o;

    String* output = 0;

    kcode::table* tbl = state->shared.kcode_table();
    if(kcode::mbchar_p(tbl, *cur)) {
      size_t clen = kcode::mbclen(tbl, *cur);
      if(o + clen <= size()) {
        output = String::create(state, reinterpret_cast<const char*>(cur), clen);
      }
    }

    if(!output) {
      output = String::create(state, reinterpret_cast<const char*>(cur), 1);
    }

    output->klass(state, class_object(state));
    if(RTEST(tainted_p(state))) output->taint(state);

    return output;
  }
Ejemplo n.º 2
0
  String* String::substring(STATE, Fixnum* start_f, Fixnum* count_f) {
    native_int start = start_f->to_native();
    native_int count = count_f->to_native();
    native_int total = num_bytes_->to_native();

    if(count < 0) return (String*)Qnil;

    if(start < 0) {
      start += total;
      if(start < 0) return (String*)Qnil;
    }

    if(start > total) return (String*)Qnil;

    if(start + count > total) {
      count = total - start;
    }

    if(count < 0) count = 0;

    String* sub = String::create(state, Fixnum::from(count));
    sub->klass(state, class_object(state));

    uint8_t* buf = byte_address() + start;

    memcpy(sub->byte_address(), buf, count);

    if(tainted_p(state) == Qtrue) sub->taint(state);

    return sub;
  }
Ejemplo n.º 3
0
String* String::substring(STATE, Fixnum* start_f, Fixnum* count_f) {
    native_int start = start_f->to_native();
    native_int count = count_f->to_native();
    native_int total = num_bytes_->to_native();
    native_int data_size = as<CharArray>(data_)->size();

    // Clamp the string size the maximum underlying byte array size
    if(unlikely(total > data_size)) {
        total = data_size;
    }

    if(count < 0) return nil<String>();

    if(start < 0) {
        start += total;
        if(start < 0) return nil<String>();
    }

    if(start > total) return nil<String>();

    if(start + count > total) {
        count = total - start;
    }

    if(count < 0) count = 0;

    String* sub = String::create(state, Fixnum::from(count));
    sub->klass(state, class_object(state));

    uint8_t* buf = byte_address() + start;

    memcpy(sub->byte_address(), buf, count);

    if(tainted_p(state) == Qtrue) sub->taint(state);

    return sub;
}
Ejemplo n.º 4
0
  String* String::transform(STATE, Tuple* tbl, Object* respect_kcode) {
    uint8_t* cur = byte_address();
    uint8_t* fin = cur + size();

    if(tbl->num_fields() < 256) {
      return (String*)Primitives::failure();
    }

    Object** tbl_ptr = tbl->field;

    kcode::table* kcode_tbl = 0;
    if(RTEST(respect_kcode)) {
      kcode_tbl = state->shared.kcode_table();
    } else {
      kcode_tbl = kcode::null_table();
    }

    // Calculate the final size of result
    size_t size = 0;

    while(cur < fin) {
      uint8_t byte = *cur;
      if(kcode::mbchar_p(kcode_tbl, byte)) {
        size_t clen = kcode::mbclen(kcode_tbl, byte);
        size += clen;
        cur += clen;
        continue;
      } else {
        size += as<String>(tbl_ptr[byte])->size();
      }
      cur++;
    }

    cur = byte_address();
    String* result = String::create(state, Fixnum::from(size));

    // Since we precalculated the size, we can write directly into result
    uint8_t* output = result->byte_address();

    while(cur < fin) {
      uint8_t byte = *cur;
      if(kcode::mbchar_p(kcode_tbl, byte)) {
        size_t len = kcode::mbclen(kcode_tbl, byte);
        memcpy(output, cur, len);
        output += len;
        cur += len;
        continue;
      } else {
        // Not unsafe, because we've type checked tbl_ptr above
        String* what = force_as<String>(tbl_ptr[byte]);
        uint8_t* what_buf = what->byte_address();

        switch(what->size()) {
        case 1:
          *output++ = *what_buf;
          break;
        case 2:
          *output++ = *what_buf++;
          *output++ = *what_buf;
          break;
        case 3:
          *output++ = *what_buf++;
          *output++ = *what_buf++;
          *output++ = *what_buf;
          break;
        default:
          memcpy(output, what_buf, what->size());
          output += what->size();
          break;
        }
      }
      cur++;
    }

    if(tainted_p(state)) result->taint(state);
    return result;
  }
Ejemplo n.º 5
0
String* String::transform(STATE, Tuple* tbl, Object* respect_kcode) {
    uint8_t invalid[5];

    if(tbl->num_fields() < 256) {
        return force_as<String>(Primitives::failure());
    }

    Object** tbl_ptr = tbl->field;

    kcode::table* kcode_tbl = 0;
    if(RTEST(respect_kcode)) {
        kcode_tbl = state->shared().kcode_table();
    } else {
        kcode_tbl = kcode::null_table();
    }

    // Pointers to iterate input bytes.
    uint8_t* in_p = byte_address();

    native_int str_size = size();
    native_int data_size = as<CharArray>(data_)->size();
    if(unlikely(str_size > data_size)) {
        str_size = data_size;
    }

    uint8_t* in_end = in_p + str_size;

    // Optimistic estimate that output size will be 1.25 x input.
    native_int out_chunk = str_size * 5 / 4;
    native_int out_size = out_chunk;
    uint8_t* output = (uint8_t*)malloc(out_size);

    uint8_t* out_p = output;
    uint8_t* out_end = out_p + out_size;

    while(in_p < in_end) {
        native_int len = 0;
        uint8_t byte = *in_p;
        uint8_t* cur_p = 0;

        if(kcode::mbchar_p(kcode_tbl, byte)) {
            len = kcode::mbclen(kcode_tbl, byte);
            native_int rem = in_end - in_p;

            // if the character length is greater than the remaining
            // bytes, we have a malformed character. Handled below.
            if(rem >= len) {
                cur_p = in_p;
                in_p += len;
            }
        } else if(String* str = try_as<String>(tbl_ptr[byte])) {
            cur_p = str->byte_address();
            len = str->size();
            in_p++;
        } else {
            Tuple* tbl = as<Tuple>(tbl_ptr[byte]);

            for(native_int i = 0; i < tbl->num_fields(); i += 2) {
                String* key = as<String>(tbl->at(i));

                native_int rem = in_end - in_p;
                native_int klen = key->size();
                if(rem < klen) continue;

                if(memcmp(in_p, key->byte_address(), klen) == 0) {
                    String* str = as<String>(tbl->at(i+1));
                    cur_p = str->byte_address();
                    len = str->size();
                    in_p += klen;
                    break;
                }
            }
        }

        // We could not map this byte, so we add it to the output
        // in stringified octal notation (ie \nnn).
        if(!cur_p) {
            snprintf((char*)invalid, 5, "\\%03o", *((char*)in_p) & 0377);
            in_p++;
            cur_p = invalid;
            len = 4;
        }

        if(out_p + len > out_end) {
            native_int pos = out_p - output;
            out_size += (len > out_chunk ? len : out_chunk);
            output = (uint8_t*)realloc(output, out_size);
            out_p = output + pos;
            out_end = output + out_size;
        }

        switch(len) {
        case 1:
            *out_p++ = *cur_p;
            break;
        case 2:
            *out_p++ = *cur_p++;
            *out_p++ = *cur_p;
            break;
        case 3:
            *out_p++ = *cur_p++;
            *out_p++ = *cur_p++;
            *out_p++ = *cur_p;
            break;
        default:
            memcpy(out_p, cur_p, len);
            out_p += len;
            break;
        }
    }

    String* result = String::create(state,
                                    reinterpret_cast<const char*>(output),
                                    out_p - output);
    free(output);

    if(tainted_p(state)) result->taint(state);
    return result;
}