Beispiel #1
0
static void write_utf8(bson_buffer_t buffer, VALUE string, char check_null) {
    result_t status = check_string((unsigned char*)RSTRING_PTR(string), RSTRING_LEN(string),
                                   1, check_null);
    if (status == HAS_NULL) {
        bson_buffer_free(buffer);
        rb_raise(InvalidDocument, "Key names / regex patterns must not contain the NULL byte");
    } else if (status == NOT_UTF_8) {
        bson_buffer_free(buffer);
        rb_raise(InvalidStringEncoding, "String not valid UTF-8");
    }
    SAFE_WRITE(buffer, RSTRING_PTR(string), (int)RSTRING_LEN(string));
}
Beispiel #2
0
static void write_utf8(bson_buffer_t buffer, VALUE string, int allow_null) {
    result_t status = validate_utf8_encoding(
        (const char*)RSTRING_PTR(string), RSTRING_LEN(string), allow_null);

    if (status == HAS_NULL) {
        bson_buffer_free(buffer);
        rb_raise(InvalidDocument, "Key names / regex patterns must not contain the NULL byte");
    } else if (status == INVALID_UTF8) {
        bson_buffer_free(buffer);
        rb_raise(InvalidStringEncoding, "String not valid UTF-8");
    }
    SAFE_WRITE(buffer, RSTRING_PTR(string), (int)RSTRING_LEN(string));
}
Beispiel #3
0
static VALUE method_serialize(VALUE self, VALUE doc, VALUE check_keys, VALUE move_id) {
    VALUE result;
    buffer_t buffer = buffer_new();
    if (buffer == NULL) {
        rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
    }

    write_doc(buffer, doc, check_keys, move_id);

    result = rb_str_new(buffer_get_buffer(buffer), buffer_get_position(buffer));
    if (bson_buffer_free(buffer) != 0) {
        rb_raise(rb_eRuntimeError, "failed to free buffer");
    }
    return result;
}
Beispiel #4
0
static void write_doc(buffer_t buffer, VALUE hash, VALUE check_keys, VALUE move_id) {
    buffer_position start_position = buffer_get_position(buffer);
    buffer_position length_location = buffer_save_space(buffer, 4);
    buffer_position length;
    int allow_id;
    int (*write_function)(VALUE, VALUE, VALUE) = NULL;
    VALUE id_str = rb_str_new2("_id");
    VALUE id_sym = ID2SYM(rb_intern("_id"));

    if (length_location == -1) {
        rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
    }

    // write '_id' first if move_id is true. then don't allow an id to be written.
    if(move_id == Qtrue) {
        allow_id = 0;
        if (rb_funcall(hash, rb_intern("has_key?"), 1, id_str) == Qtrue) {
            VALUE id = rb_hash_aref(hash, id_str);
            write_element_with_id(id_str, id, pack_extra(buffer, check_keys));
        } else if (rb_funcall(hash, rb_intern("has_key?"), 1, id_sym) == Qtrue) {
            VALUE id = rb_hash_aref(hash, id_sym);
            write_element_with_id(id_sym, id, pack_extra(buffer, check_keys));
        }
    }
    else {
        allow_id = 1;
        // Ensure that hash doesn't contain both '_id' and :_id
        if ((rb_obj_classname(hash), "Hash") == 0) {
            if ((rb_funcall(hash, rb_intern("has_key?"), 1, id_str) == Qtrue) &&
                   (rb_funcall(hash, rb_intern("has_key?"), 1, id_sym) == Qtrue)) {
                      VALUE oid_sym = rb_hash_delete(hash, id_sym);
                      rb_funcall(hash, rb_intern("[]="), 2, id_str, oid_sym);
            }
        }
    }

    if(allow_id == 1) {
        write_function = write_element_with_id;
    }
    else {
        write_function = write_element_without_id;
    }

    // we have to check for an OrderedHash and handle that specially
    if (strcmp(rb_obj_classname(hash), "BSON::OrderedHash") == 0) {
        VALUE keys = rb_funcall(hash, rb_intern("keys"), 0);
        int i;
                for(i = 0; i < RARRAY_LEN(keys); i++) {
            VALUE key = rb_ary_entry(keys, i);
            VALUE value = rb_hash_aref(hash, key);

            write_function(key, value, pack_extra(buffer, check_keys));
        }
    } else if (rb_obj_is_kind_of(hash, RB_HASH) == Qtrue) {
        rb_hash_foreach(hash, write_function, pack_extra(buffer, check_keys));
    } else {
        bson_buffer_free(buffer);
        rb_raise(InvalidDocument, "BSON.serialize takes a Hash but got a %s", rb_obj_classname(hash));
    }

    // write null byte and fill in length
    SAFE_WRITE(buffer, &zero, 1);
    length = buffer_get_position(buffer) - start_position;

    // make sure that length doesn't exceed 4MB
    if (length > max_bson_size) {
      bson_buffer_free(buffer);
      rb_raise(InvalidDocument, "Document too large: BSON documents are limited to %d bytes.", max_bson_size);
      return;
    }
    SAFE_WRITE_AT_POS(buffer, length_location, (const char*)&length, 4);
}
Beispiel #5
0
static int write_element(VALUE key, VALUE value, VALUE extra, int allow_id) {
    buffer_t buffer = (buffer_t)NUM2LL(rb_ary_entry(extra, 0));
    VALUE check_keys = rb_ary_entry(extra, 1);

    if (TYPE(key) == T_SYMBOL) {
        // TODO better way to do this... ?
        key = rb_str_new2(rb_id2name(SYM2ID(key)));
    }

    if (TYPE(key) != T_STRING) {
        bson_buffer_free(buffer);
        rb_raise(rb_eTypeError, "keys must be strings or symbols");
    }

    if (allow_id == 0 && strcmp("_id", RSTRING_PTR(key)) == 0) {
        return ST_CONTINUE;
    }

    if (check_keys == Qtrue) {
        int i;
        if (RSTRING_LEN(key) > 0 && RSTRING_PTR(key)[0] == '$') {
            bson_buffer_free(buffer);
            rb_raise(InvalidKeyName, "%s - key must not start with '$'", RSTRING_PTR(key));
        }
        for (i = 0; i < RSTRING_LEN(key); i++) {
            if (RSTRING_PTR(key)[i] == '.') {
                bson_buffer_free(buffer);
                rb_raise(InvalidKeyName, "%s - key must not contain '.'", RSTRING_PTR(key));
            }
        }
    }

    switch(TYPE(value)) {
    case T_BIGNUM:
        {
            if (rb_funcall(value, gt_operator, 1, LL2NUM(9223372036854775807LL)) == Qtrue ||
                rb_funcall(value, lt_operator, 1, LL2NUM(-9223372036854775808ULL)) == Qtrue) {
                bson_buffer_free(buffer);
                rb_raise(rb_eRangeError, "MongoDB can only handle 8-byte ints");
            }
        }
        // NOTE: falls through to T_FIXNUM code
    case T_FIXNUM:
        {
            long long ll_value;
            ll_value = NUM2LL(value);

            if (ll_value > 2147483647LL ||
                ll_value < -2147483648LL) {
                write_name_and_type(buffer, key, 0x12);
                SAFE_WRITE(buffer, (char*)&ll_value, 8);
            } else {
                int int_value;
                write_name_and_type(buffer, key, 0x10);
                int_value = (int)ll_value;
                SAFE_WRITE(buffer, (char*)&int_value, 4);
            }
            break;
        }
    case T_TRUE:
        {
            write_name_and_type(buffer, key, 0x08);
            SAFE_WRITE(buffer, &one, 1);
            break;
        }
    case T_FALSE:
        {
            write_name_and_type(buffer, key, 0x08);
            SAFE_WRITE(buffer, &zero, 1);
            break;
        }
    case T_FLOAT:
        {
            double d = NUM2DBL(value);
            write_name_and_type(buffer, key, 0x01);
            SAFE_WRITE(buffer, (char*)&d, 8);
            break;
        }
    case T_NIL:
        {
            write_name_and_type(buffer, key, 0x0A);
            break;
        }
    case T_HASH:
        {
            write_name_and_type(buffer, key, 0x03);
            write_doc(buffer, value, check_keys, Qfalse);
            break;
        }
    case T_ARRAY:
        {
            buffer_position length_location, start_position, obj_length;
            int items, i;
            VALUE* values;

            write_name_and_type(buffer, key, 0x04);
            start_position = buffer_get_position(buffer);

            // save space for length
            length_location = buffer_save_space(buffer, 4);
            if (length_location == -1) {
                rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
            }

            items = RARRAY_LENINT(value);
            for(i = 0; i < items; i++) {
                char* name;
                VALUE key;
                INT2STRING(&name, i);
                key = rb_str_new2(name);
                write_element_with_id(key, rb_ary_entry(value, i), pack_extra(buffer, check_keys));
                FREE_INTSTRING(name);
            }

            // write null byte and fill in length
            SAFE_WRITE(buffer, &zero, 1);
            obj_length = buffer_get_position(buffer) - start_position;
            SAFE_WRITE_AT_POS(buffer, length_location, (const char*)&obj_length, 4);
            break;
        }
    case T_STRING:
        {
            int length;
            write_name_and_type(buffer, key, 0x02);
            length = RSTRING_LENINT(value) + 1;
            SAFE_WRITE(buffer, (char*)&length, 4);
            write_utf8(buffer, value, 0);
            SAFE_WRITE(buffer, &zero, 1);
            break;
        }
    case T_SYMBOL:
        {
            const char* str_value = rb_id2name(SYM2ID(value));
            int length = (int)strlen(str_value) + 1;
            write_name_and_type(buffer, key, 0x0E);
            SAFE_WRITE(buffer, (char*)&length, 4);
            SAFE_WRITE(buffer, str_value, length);
            break;
        }
    case T_OBJECT:
        {
            // TODO there has to be a better way to do these checks...
            const char* cls = rb_obj_classname(value);
            if (strcmp(cls, "BSON::Binary") == 0 ||
                strcmp(cls, "ByteBuffer") == 0) {
                const char subtype = strcmp(cls, "ByteBuffer") ?
                    (const char)FIX2INT(rb_funcall(value, rb_intern("subtype"), 0)) : 2;
                VALUE string_data = rb_funcall(value, rb_intern("to_s"), 0);
                int length = RSTRING_LENINT(string_data);
                write_name_and_type(buffer, key, 0x05);
                if (subtype == 2) {
                    const int other_length = length + 4;
                    SAFE_WRITE(buffer, (const char*)&other_length, 4);
                    SAFE_WRITE(buffer, &subtype, 1);
                }
                SAFE_WRITE(buffer, (const char*)&length, 4);
                if (subtype != 2) {
                    SAFE_WRITE(buffer, &subtype, 1);
                }
                SAFE_WRITE(buffer, RSTRING_PTR(string_data), length);
                break;
            }
            if (strcmp(cls, "BSON::ObjectId") == 0) {
                VALUE as_array = rb_funcall(value, rb_intern("to_a"), 0);
                int i;
                write_name_and_type(buffer, key, 0x07);
                for (i = 0; i < 12; i++) {
                    char byte = (char)FIX2INT(rb_ary_entry(as_array, i));
                    SAFE_WRITE(buffer, &byte, 1);
                }
                break;
            }
            if (strcmp(cls, "BSON::DBRef") == 0) {
                buffer_position length_location, start_position, obj_length;
                VALUE ns, oid;
                write_name_and_type(buffer, key, 0x03);

                start_position = buffer_get_position(buffer);

                // save space for length
                length_location = buffer_save_space(buffer, 4);
                if (length_location == -1) {
                    rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
                }

                ns = rb_funcall(value, rb_intern("namespace"), 0);
                write_element_with_id(rb_str_new2("$ref"), ns, pack_extra(buffer, Qfalse));
                oid = rb_funcall(value, rb_intern("object_id"), 0);
                write_element_with_id(rb_str_new2("$id"), oid, pack_extra(buffer, Qfalse));

                // write null byte and fill in length
                SAFE_WRITE(buffer, &zero, 1);
                obj_length = buffer_get_position(buffer) - start_position;
                SAFE_WRITE_AT_POS(buffer, length_location, (const char*)&obj_length, 4);
                break;
            }
            if (strcmp(cls, "BSON::Code") == 0) {
                buffer_position length_location, start_position, total_length;
                int length;
                VALUE code_str;
                write_name_and_type(buffer, key, 0x0F);

                start_position = buffer_get_position(buffer);
                length_location = buffer_save_space(buffer, 4);
                if (length_location == -1) {
                    rb_raise(rb_eNoMemError, "failed to allocate memory in buffer.c");
                }

                code_str = rb_funcall(value, rb_intern("code"), 0);
                length = RSTRING_LENINT(code_str) + 1;
                SAFE_WRITE(buffer, (char*)&length, 4);
                SAFE_WRITE(buffer, RSTRING_PTR(code_str), length - 1);
                SAFE_WRITE(buffer, &zero, 1);
                write_doc(buffer, rb_funcall(value, rb_intern("scope"), 0), Qfalse, Qfalse);

                total_length = buffer_get_position(buffer) - start_position;
                SAFE_WRITE_AT_POS(buffer, length_location, (const char*)&total_length, 4);
                break;
            }
            if (strcmp(cls, "BSON::MaxKey") == 0) {
                write_name_and_type(buffer, key, 0x7f);
                break;
            }
            if (strcmp(cls, "BSON::MinKey") == 0) {
                write_name_and_type(buffer, key, 0xff);
                break;
            }
            if (strcmp(cls, "BSON::Timestamp") == 0) {
                write_name_and_type(buffer, key, 0x11);
                int seconds = FIX2INT(
                    rb_funcall(value, rb_intern("seconds"), 0));
                int increment = FIX2INT(
                    rb_funcall(value, rb_intern("increment"), 0));

                SAFE_WRITE(buffer, (const char*)&increment, 4);
                SAFE_WRITE(buffer, (const char*)&seconds, 4);
                break;
            }
            if (strcmp(cls, "DateTime") == 0 || strcmp(cls, "Date") == 0 || strcmp(cls, "ActiveSupport::TimeWithZone") == 0) {
                bson_buffer_free(buffer);
                rb_raise(InvalidDocument, "%s is not currently supported; use a UTC Time instance instead.", cls);
                break;
            }
            if(strcmp(cls, "Complex") == 0 || strcmp(cls, "Rational") == 0 || strcmp(cls, "BigDecimal") == 0) {
                bson_buffer_free(buffer);
                rb_raise(InvalidDocument, "Cannot serialize the Numeric type %s as BSON; only Bignum, Fixnum, and Float are supported.", cls);
                break;
            }
            bson_buffer_free(buffer);
            rb_raise(InvalidDocument, "Cannot serialize an object of class %s into BSON.", cls);
            break;
        }
    case T_DATA:
        {
            const char* cls = rb_obj_classname(value);
            if (strcmp(cls, "Time") == 0) {
                double t = NUM2DBL(rb_funcall(value, rb_intern("to_f"), 0));
                long long time_since_epoch = (long long)round(t * 1000);
                write_name_and_type(buffer, key, 0x09);
                SAFE_WRITE(buffer, (const char*)&time_since_epoch, 8);
                break;
            }
            if(strcmp(cls, "BigDecimal") == 0) {
                bson_buffer_free(buffer);
                rb_raise(InvalidDocument, "Cannot serialize the Numeric type %s as BSON; only Bignum, Fixnum, and Float are supported.", cls);
                break;
            }
            bson_buffer_free(buffer);
            rb_raise(InvalidDocument, "Cannot serialize an object of class %s into BSON.", cls);
            break;
        }
    case T_REGEXP:
        {
            VALUE pattern = RREGEXP_SRC(value);
            long flags = RREGEXP_OPTIONS(value);
            VALUE has_extra;

            write_name_and_type(buffer, key, 0x0B);

            write_utf8(buffer, pattern, 1);
            SAFE_WRITE(buffer, &zero, 1);

            if (flags & IGNORECASE) {
                char ignorecase = 'i';
                SAFE_WRITE(buffer, &ignorecase, 1);
            }
            if (flags & MULTILINE) {
                char multiline = 'm';
                char dotall = 's';
                SAFE_WRITE(buffer, &multiline, 1);
                SAFE_WRITE(buffer, &dotall, 1);
            }
            if (flags & EXTENDED) {
                char extended = 'x';
                SAFE_WRITE(buffer, &extended, 1);
            }

            has_extra = rb_funcall(value, rb_intern("respond_to?"), 1, rb_str_new2("extra_options_str"));
            if (TYPE(has_extra) == T_TRUE) {
                VALUE extra = rb_funcall(value, rb_intern("extra_options_str"), 0);
                buffer_position old_position = buffer_get_position(buffer);
                SAFE_WRITE(buffer, RSTRING_PTR(extra), RSTRING_LENINT(extra));
                qsort(buffer_get_buffer(buffer) + old_position, RSTRING_LEN(extra), sizeof(char), cmp_char);
            }
            SAFE_WRITE(buffer, &zero, 1);

            break;
        }
    default:
        {
            const char* cls = rb_obj_classname(value);
            bson_buffer_free(buffer);
            rb_raise(InvalidDocument, "Cannot serialize an object of class %s (type %d) into BSON.", cls, TYPE(value));
            break;
        }
    }
    return ST_CONTINUE;
}