int as_msg_make_response_bufbuilder(as_record *r, as_storage_rd *rd, cf_buf_builder **bb_r, bool nobindata, char *nsname, bool use_sets, bool include_key, cf_vector *binlist) { // Sanity checks. Either rd should be there or nobindata and nsname should be present. if (!(rd || (nobindata && nsname))) { cf_detail(AS_PROTO, "Neither storage record nor nobindata is set. Skipping the record."); return 0; } // figure out the size of the entire buffer int set_name_len = 0; const char *set_name = NULL; int ns_len = rd ? strlen(rd->ns->name) : strlen(nsname); if (use_sets && as_index_get_set_id(r) != INVALID_SET_ID) { as_namespace *ns = NULL; if (rd) { ns = rd->ns; } else if (nsname) { ns = as_namespace_get_byname(nsname); } if (!ns) { cf_info(AS_PROTO, "Cannot get namespace, needed to get set information. Skipping record."); return -1; } set_name = as_index_get_set_name(r, ns); if (set_name) { set_name_len = strlen(set_name); } } uint8_t* key = NULL; uint32_t key_size = 0; if (include_key && as_index_is_flag_set(r, AS_INDEX_FLAG_KEY_STORED)) { if (! as_storage_record_get_key(rd)) { cf_info(AS_PROTO, "can't get key - skipping record"); return -1; } key = rd->key; key_size = rd->key_size; } uint16_t n_fields = 2; int msg_sz = sizeof(as_msg); msg_sz += sizeof(as_msg_field) + sizeof(cf_digest); msg_sz += sizeof(as_msg_field) + ns_len; if (set_name) { n_fields++; msg_sz += sizeof(as_msg_field) + set_name_len; } if (key) { n_fields++; msg_sz += sizeof(as_msg_field) + key_size; } int list_bins = 0; int in_use_bins = 0; if (rd) { in_use_bins = as_bin_inuse_count(rd); } if (nobindata == false) { if(binlist) { int binlist_sz = cf_vector_size(binlist); for(uint16_t i = 0; i < binlist_sz; i++) { char binname[AS_ID_BIN_SZ]; cf_vector_get(binlist, i, (void*)&binname); cf_debug(AS_PROTO, " Binname projected inside is |%s| \n", binname); as_bin *p_bin = as_bin_get (rd, (uint8_t*)binname, strlen(binname)); if (!p_bin) { cf_debug(AS_PROTO, "To be projected bin |%s| not found \n", binname); continue; } cf_debug(AS_PROTO, "Adding bin |%s| to projected bins |%s| \n", binname); list_bins++; msg_sz += sizeof(as_msg_op); msg_sz += rd->ns->single_bin ? 0 : strlen(binname); uint32_t psz; if (as_bin_is_hidden(p_bin)) { psz = 0; } else { as_particle_tobuf(p_bin, 0, &psz); // get size } msg_sz += psz; } } else { msg_sz += sizeof(as_msg_op) * in_use_bins; // the bin headers for (uint16_t i = 0; i < in_use_bins; i++) { as_bin *p_bin = &rd->bins[i]; msg_sz += rd->ns->single_bin ? 0 : strlen(as_bin_get_name_from_id(rd->ns, p_bin->id)); uint32_t psz; if (as_bin_is_hidden(p_bin)) { psz = 0; } else { as_particle_tobuf(p_bin, 0, &psz); // get size } msg_sz += psz; } } } uint8_t *b; cf_buf_builder_reserve(bb_r, msg_sz, &b); // set up the header uint8_t *buf = b; as_msg *msgp = (as_msg *) buf; msgp->header_sz = sizeof(as_msg); msgp->info1 = (nobindata ? AS_MSG_INFO1_GET_NOBINDATA : 0); msgp->info2 = 0; msgp->info3 = 0; msgp->unused = 0; msgp->result_code = 0; msgp->generation = r->generation; msgp->record_ttl = r->void_time; msgp->transaction_ttl = 0; msgp->n_fields = n_fields; if (rd) { if (binlist) msgp->n_ops = list_bins; else msgp->n_ops = in_use_bins; } else { msgp->n_ops = 0; } as_msg_swap_header(msgp); buf += sizeof(as_msg); as_msg_field *mf = (as_msg_field *) buf; mf->field_sz = sizeof(cf_digest) + 1; mf->type = AS_MSG_FIELD_TYPE_DIGEST_RIPE; if (rd) { memcpy(mf->data, &rd->keyd, sizeof(cf_digest)); } else { memcpy(mf->data, &r->key, sizeof(cf_digest)); } as_msg_swap_field(mf); buf += sizeof(as_msg_field) + sizeof(cf_digest); mf = (as_msg_field *) buf; mf->field_sz = ns_len + 1; mf->type = AS_MSG_FIELD_TYPE_NAMESPACE; if (rd) { memcpy(mf->data, rd->ns->name, ns_len); } else { memcpy(mf->data, nsname, ns_len); } as_msg_swap_field(mf); buf += sizeof(as_msg_field) + ns_len; if (set_name) { mf = (as_msg_field *) buf; mf->field_sz = set_name_len + 1; mf->type = AS_MSG_FIELD_TYPE_SET; memcpy(mf->data, set_name, set_name_len); as_msg_swap_field(mf); buf += sizeof(as_msg_field) + set_name_len; } if (key) { mf = (as_msg_field *) buf; mf->field_sz = key_size + 1; mf->type = AS_MSG_FIELD_TYPE_KEY; memcpy(mf->data, key, key_size); as_msg_swap_field(mf); buf += sizeof(as_msg_field) + key_size; } if (nobindata) { goto Out; } if(binlist) { int binlist_sz = cf_vector_size(binlist); for(uint16_t i = 0; i < binlist_sz; i++) { char binname[AS_ID_BIN_SZ]; cf_vector_get(binlist, i, (void*)&binname); cf_debug(AS_PROTO, " Binname projected inside is |%s| \n", binname); as_bin *p_bin = as_bin_get (rd, (uint8_t*)binname, strlen(binname)); if (!p_bin) // should it be checked before ??? continue; as_msg_op *op = (as_msg_op *)buf; buf += sizeof(as_msg_op); op->op = AS_MSG_OP_READ; op->name_sz = as_bin_memcpy_name(rd->ns, op->name, p_bin); buf += op->name_sz; // Since there are two variable bits, the size is everything after // the data bytes - and this is only the head, we're patching up // the rest in a minute. op->op_sz = 4 + op->name_sz; if (as_bin_inuse(p_bin)) { op->particle_type = as_particle_type_convert(as_bin_get_particle_type(p_bin)); op->version = as_bin_get_version(p_bin, rd->ns->single_bin); uint32_t psz = msg_sz - (buf - b); // size remaining in buffer, for safety if (as_bin_is_hidden(p_bin)) { op->particle_type = AS_PARTICLE_TYPE_NULL; psz = 0; } else { if (0 != as_particle_tobuf(p_bin, buf, &psz)) { cf_warning(AS_PROTO, "particle to buf: could not copy data!"); } } buf += psz; op->op_sz += psz; } else { cf_debug(AS_PROTO, "Whoops !! bin not in use"); op->particle_type = AS_PARTICLE_TYPE_NULL; } as_msg_swap_op(op); } } else { // over all bins, copy into the buffer for (uint16_t i = 0; i < in_use_bins; i++) { as_msg_op *op = (as_msg_op *)buf; buf += sizeof(as_msg_op); op->op = AS_MSG_OP_READ; op->name_sz = as_bin_memcpy_name(rd->ns, op->name, &rd->bins[i]); buf += op->name_sz; // Since there are two variable bits, the size is everything after // the data bytes - and this is only the head, we're patching up // the rest in a minute. op->op_sz = 4 + op->name_sz; if (as_bin_inuse(&rd->bins[i])) { op->particle_type = as_particle_type_convert(as_bin_get_particle_type(&rd->bins[i])); op->version = as_bin_get_version(&rd->bins[i], rd->ns->single_bin); uint32_t psz = msg_sz - (buf - b); // size remaining in buffer, for safety if (as_bin_is_hidden(&rd->bins[i])) { op->particle_type = AS_PARTICLE_TYPE_NULL; psz = 0; } else { if (0 != as_particle_tobuf(&rd->bins[i], buf, &psz)) { cf_warning(AS_PROTO, "particle to buf: could not copy data!"); } } buf += psz; op->op_sz += psz; } else { op->particle_type = AS_PARTICLE_TYPE_NULL; } as_msg_swap_op(op); } } Out: return(0); }
int as_msg_make_response_bufbuilder(as_record *r, as_storage_rd *rd, cf_buf_builder **bb_r, bool nobindata, char *nsname, bool include_ldt_data, bool include_key, bool skip_empty_records, cf_vector *binlist) { // Sanity checks. Either rd should be there or nobindata and nsname should be present. if (!(rd || (nobindata && nsname))) { cf_detail(AS_PROTO, "Neither storage record nor nobindata is set. Skipping the record."); return 0; } // figure out the size of the entire buffer int set_name_len = 0; const char *set_name = NULL; int ns_len = rd ? strlen(rd->ns->name) : strlen(nsname); if (as_index_get_set_id(r) != INVALID_SET_ID) { as_namespace *ns = NULL; if (rd) { ns = rd->ns; } else if (nsname) { ns = as_namespace_get_byname(nsname); } if (!ns) { cf_info(AS_PROTO, "Cannot get namespace, needed to get set information. Skipping record."); return -1; } set_name = as_index_get_set_name(r, ns); if (set_name) { set_name_len = strlen(set_name); } } uint8_t* key = NULL; uint32_t key_size = 0; if (include_key && as_index_is_flag_set(r, AS_INDEX_FLAG_KEY_STORED)) { if (! as_storage_record_get_key(rd)) { cf_info(AS_PROTO, "can't get key - skipping record"); return -1; } key = rd->key; key_size = rd->key_size; } uint16_t n_fields = 2; int msg_sz = sizeof(as_msg); msg_sz += sizeof(as_msg_field) + sizeof(cf_digest); msg_sz += sizeof(as_msg_field) + ns_len; if (set_name) { n_fields++; msg_sz += sizeof(as_msg_field) + set_name_len; } if (key) { n_fields++; msg_sz += sizeof(as_msg_field) + key_size; } int list_bins = 0; int in_use_bins = rd ? (int)as_bin_inuse_count(rd) : 0; as_val *ldt_bin_vals[in_use_bins]; if (! nobindata) { if (binlist) { int binlist_sz = cf_vector_size(binlist); for (uint16_t i = 0; i < binlist_sz; i++) { char binname[AS_ID_BIN_SZ]; cf_vector_get(binlist, i, (void*)&binname); as_bin *p_bin = as_bin_get(rd, binname); if (! p_bin) { continue; } msg_sz += sizeof(as_msg_op); msg_sz += rd->ns->single_bin ? 0 : strlen(binname); if (as_bin_is_hidden(p_bin)) { if (include_ldt_data) { msg_sz += (int)as_ldt_particle_client_value_size(rd, p_bin, &ldt_bin_vals[list_bins]); } else { ldt_bin_vals[list_bins] = NULL; } } else { msg_sz += (int)as_bin_particle_client_value_size(p_bin); } list_bins++; } // Don't return an empty record. if (skip_empty_records && list_bins == 0) { return 0; } } else { msg_sz += sizeof(as_msg_op) * in_use_bins; for (uint16_t i = 0; i < in_use_bins; i++) { as_bin *p_bin = &rd->bins[i]; msg_sz += rd->ns->single_bin ? 0 : strlen(as_bin_get_name_from_id(rd->ns, p_bin->id)); if (as_bin_is_hidden(p_bin)) { if (include_ldt_data) { msg_sz += (int)as_ldt_particle_client_value_size(rd, p_bin, &ldt_bin_vals[i]); } else { ldt_bin_vals[i] = NULL; } } else { msg_sz += (int)as_bin_particle_client_value_size(p_bin); } } } } uint8_t *b; cf_buf_builder_reserve(bb_r, msg_sz, &b); // set up the header uint8_t *buf = b; as_msg *msgp = (as_msg *) buf; msgp->header_sz = sizeof(as_msg); msgp->info1 = (nobindata ? AS_MSG_INFO1_GET_NOBINDATA : 0); msgp->info2 = 0; msgp->info3 = 0; msgp->unused = 0; msgp->result_code = 0; msgp->generation = r->generation; msgp->record_ttl = r->void_time; msgp->transaction_ttl = 0; msgp->n_fields = n_fields; if (rd) { if (binlist) msgp->n_ops = list_bins; else msgp->n_ops = in_use_bins; } else { msgp->n_ops = 0; } as_msg_swap_header(msgp); buf += sizeof(as_msg); as_msg_field *mf = (as_msg_field *) buf; mf->field_sz = sizeof(cf_digest) + 1; mf->type = AS_MSG_FIELD_TYPE_DIGEST_RIPE; if (rd) { memcpy(mf->data, &rd->keyd, sizeof(cf_digest)); } else { memcpy(mf->data, &r->key, sizeof(cf_digest)); } as_msg_swap_field(mf); buf += sizeof(as_msg_field) + sizeof(cf_digest); mf = (as_msg_field *) buf; mf->field_sz = ns_len + 1; mf->type = AS_MSG_FIELD_TYPE_NAMESPACE; if (rd) { memcpy(mf->data, rd->ns->name, ns_len); } else { memcpy(mf->data, nsname, ns_len); } as_msg_swap_field(mf); buf += sizeof(as_msg_field) + ns_len; if (set_name) { mf = (as_msg_field *) buf; mf->field_sz = set_name_len + 1; mf->type = AS_MSG_FIELD_TYPE_SET; memcpy(mf->data, set_name, set_name_len); as_msg_swap_field(mf); buf += sizeof(as_msg_field) + set_name_len; } if (key) { mf = (as_msg_field *) buf; mf->field_sz = key_size + 1; mf->type = AS_MSG_FIELD_TYPE_KEY; memcpy(mf->data, key, key_size); as_msg_swap_field(mf); buf += sizeof(as_msg_field) + key_size; } if (nobindata) { return 0; } if (binlist) { list_bins = 0; int binlist_sz = cf_vector_size(binlist); for (uint16_t i = 0; i < binlist_sz; i++) { char binname[AS_ID_BIN_SZ]; cf_vector_get(binlist, i, (void*)&binname); as_bin *p_bin = as_bin_get(rd, binname); if (! p_bin) { continue; } as_msg_op *op = (as_msg_op *)buf; op->op = AS_MSG_OP_READ; op->version = 0; op->name_sz = as_bin_memcpy_name(rd->ns, op->name, p_bin); op->op_sz = 4 + op->name_sz; buf += sizeof(as_msg_op) + op->name_sz; if (as_bin_is_hidden(p_bin)) { buf += as_ldt_particle_to_client(ldt_bin_vals[list_bins], op); } else { buf += as_bin_particle_to_client(p_bin, op); } list_bins++; as_msg_swap_op(op); } } else { for (uint16_t i = 0; i < in_use_bins; i++) { as_msg_op *op = (as_msg_op *)buf; op->op = AS_MSG_OP_READ; op->version = 0; op->name_sz = as_bin_memcpy_name(rd->ns, op->name, &rd->bins[i]); op->op_sz = 4 + op->name_sz; buf += sizeof(as_msg_op) + op->name_sz; if (as_bin_is_hidden(&rd->bins[i])) { buf += as_ldt_particle_to_client(ldt_bin_vals[i], op); } else { buf += as_bin_particle_to_client(&rd->bins[i], op); } as_msg_swap_op(op); } } return 0; }
/* Internal Function: Does the post processing for the UDF record after the * UDF execution. Does the following: * 1. Record is closed * 2. urecord_op is updated to delete in case there is no bin left in it. * 3. record->pickled_buf is populated before the record is close in case * it was write operation * 4. UDF updates cache is cleared * * Returns: Nothing * * Parameters: urecord - UDF record to operate on * urecord_op (out) - Populated with the optype */ void udf_rw_post_processing(udf_record *urecord, udf_optype *urecord_op, uint16_t set_id) { as_storage_rd *rd = urecord->rd; as_transaction *tr = urecord->tr; as_index_ref *r_ref = urecord->r_ref; // INIT urecord->pickled_buf = NULL; urecord->pickled_sz = 0; urecord->pickled_void_time = 0; as_rec_props_clear(&urecord->pickled_rec_props); bool udf_xdr_ship_op = false; // TODO: optimize not to allocate buffer if it is single // node cluster. No remote to send data to // Check if UDF has updates. if (urecord->flag & UDF_RECORD_FLAG_HAS_UPDATES) { // Check if the record is not deleted after an update if ( urecord->flag & UDF_RECORD_FLAG_OPEN) { *urecord_op = UDF_OPTYPE_WRITE; udf_xdr_ship_op = true; } else { // If the record has updates and it is not open, // and if it pre-existed it's an update followed by a delete. if ( urecord->flag & UDF_RECORD_FLAG_PREEXISTS) { *urecord_op = UDF_OPTYPE_DELETE; udf_xdr_ship_op = true; } // If the record did not pre-exist and is updated // and it is not open, then it is create followed by // delete essentially no_op. else { *urecord_op = UDF_OPTYPE_NONE; } } } else if ((urecord->flag & UDF_RECORD_FLAG_PREEXISTS) && !(urecord->flag & UDF_RECORD_FLAG_OPEN)) { *urecord_op = UDF_OPTYPE_DELETE; udf_xdr_ship_op = true; } else { *urecord_op = UDF_OPTYPE_READ; } cf_detail(AS_UDF, "FINISH working with LDT Record %p %p %p %p %d", &urecord, urecord->tr, urecord->r_ref, urecord->rd, (urecord->flag & UDF_RECORD_FLAG_STORAGE_OPEN)); // If there exists a record reference but no bin of the record is in use, // delete the record. remove from the tree. Only LDT_RECORD here not needed // for LDT_SUBRECORD (only do it if requested by UDF). All the SUBRECORD of // removed LDT_RECORD will be lazily cleaned up by defrag. if (!(urecord->flag & UDF_RECORD_FLAG_IS_SUBRECORD) && urecord->flag & UDF_RECORD_FLAG_OPEN && !as_bin_inuse_has(rd)) { as_index_delete(tr->rsv.tree, &tr->keyd); urecord->starting_memory_bytes = 0; *urecord_op = UDF_OPTYPE_DELETE; udf_xdr_ship_op = true; } else if (*urecord_op == UDF_OPTYPE_WRITE) { cf_detail(AS_UDF, "Committing Changes %"PRIx64" n_bins %d", rd->keyd, as_bin_get_n_bins(r_ref->r, rd)); size_t rec_props_data_size = as_storage_record_rec_props_size(rd); uint8_t rec_props_data[rec_props_data_size]; if (rec_props_data_size > 0) { as_storage_record_set_rec_props(rd, rec_props_data); } write_local_post_processing(tr, tr->rsv.ns, NULL, &urecord->pickled_buf, &urecord->pickled_sz, &urecord->pickled_void_time, &urecord->pickled_rec_props, true/*increment_generation*/, NULL, r_ref->r, rd, urecord->starting_memory_bytes); // Now ok to accommodate a new stored key... if (! as_index_is_flag_set(r_ref->r, AS_INDEX_FLAG_KEY_STORED) && rd->key) { if (rd->ns->storage_data_in_memory) { as_record_allocate_key(r_ref->r, rd->key, rd->key_size); } as_index_set_flags(r_ref->r, AS_INDEX_FLAG_KEY_STORED); } // ... or drop a stored key. else if (as_index_is_flag_set(r_ref->r, AS_INDEX_FLAG_KEY_STORED) && ! rd->key) { if (rd->ns->storage_data_in_memory) { as_record_remove_key(r_ref->r); } as_index_clear_flags(r_ref->r, AS_INDEX_FLAG_KEY_STORED); } } // Collect the record information (for XDR) before closing the record as_generation generation = 0; if (urecord->flag & UDF_RECORD_FLAG_OPEN) { generation = r_ref->r->generation; set_id = as_index_get_set_id(r_ref->r); } // Close the record for all the cases udf_record_close(urecord, false); // Write to XDR pipe after closing the record, in order to release the record lock as // early as possible. if (udf_xdr_ship_op == true) { if (UDF_OP_IS_WRITE(*urecord_op)) { cf_detail(AS_UDF, "UDF write shipping for key %" PRIx64, tr->keyd); xdr_write(tr->rsv.ns, tr->keyd, generation, 0, false, set_id); } else if (UDF_OP_IS_DELETE(*urecord_op)) { cf_detail(AS_UDF, "UDF delete shipping for key %" PRIx64, tr->keyd); xdr_write(tr->rsv.ns, tr->keyd, generation, 0, true, set_id); } } // Replication happens when the main record replicates if (urecord->particle_data) { cf_free(urecord->particle_data); urecord->particle_data = 0; } udf_record_cache_free(urecord); }
/* Internal Function: Does the post processing for the UDF record after the * UDF execution. Does the following: * 1. Record is closed * 2. urecord_op is updated to delete in case there is no bin left in it. * 3. record->pickled_buf is populated before the record is close in case * it was write operation * 4. UDF updates cache is cleared * * Returns: Nothing * * Parameters: urecord - UDF record to operate on * urecord_op (out) - Populated with the optype */ static void post_processing(udf_record *urecord, udf_optype *urecord_op, uint16_t set_id) { as_storage_rd *rd = urecord->rd; as_transaction *tr = urecord->tr; as_index_ref *r_ref = urecord->r_ref; // INIT urecord->pickled_buf = NULL; urecord->pickled_sz = 0; as_rec_props_clear(&urecord->pickled_rec_props); bool udf_xdr_ship_op = false; getop(urecord, urecord_op); if (UDF_OP_IS_DELETE(*urecord_op) || UDF_OP_IS_WRITE(*urecord_op)) { udf_xdr_ship_op = true; } cf_detail(AS_UDF, "FINISH working with LDT Record %p %p %p %p %d", &urecord, urecord->tr, urecord->r_ref, urecord->rd, (urecord->flag & UDF_RECORD_FLAG_STORAGE_OPEN)); // If there exists a record reference but no bin of the record is in use, // delete the record. remove from the tree. Only LDT_RECORD here not needed // for LDT_SUBRECORD (only do it if requested by UDF). All the SUBRECORD of // removed LDT_RECORD will be lazily cleaned up by defrag. if (udf_zero_bins_left(urecord)) { as_transaction *tr = urecord->tr; as_index_delete(tr->rsv.tree, &tr->keyd); urecord->starting_memory_bytes = 0; *urecord_op = UDF_OPTYPE_DELETE; } else if (*urecord_op == UDF_OPTYPE_WRITE) { cf_detail_digest(AS_UDF, &rd->keyd, "Committing Changes n_bins %d", as_bin_get_n_bins(r_ref->r, rd)); size_t rec_props_data_size = as_storage_record_rec_props_size(rd); uint8_t rec_props_data[rec_props_data_size]; if (rec_props_data_size > 0) { as_storage_record_set_rec_props(rd, rec_props_data); } write_udf_post_processing(tr, rd, &urecord->pickled_buf, &urecord->pickled_sz, &urecord->pickled_rec_props, urecord->starting_memory_bytes); // Now ok to accommodate a new stored key... if (! as_index_is_flag_set(r_ref->r, AS_INDEX_FLAG_KEY_STORED) && rd->key) { if (rd->ns->storage_data_in_memory) { as_record_allocate_key(r_ref->r, rd->key, rd->key_size); } as_index_set_flags(r_ref->r, AS_INDEX_FLAG_KEY_STORED); } // ... or drop a stored key. else if (as_index_is_flag_set(r_ref->r, AS_INDEX_FLAG_KEY_STORED) && ! rd->key) { if (rd->ns->storage_data_in_memory) { as_record_remove_key(r_ref->r); } as_index_clear_flags(r_ref->r, AS_INDEX_FLAG_KEY_STORED); } } // Collect the record information (for XDR) before closing the record as_generation generation = 0; if (urecord->flag & UDF_RECORD_FLAG_OPEN) { generation = r_ref->r->generation; set_id = as_index_get_set_id(r_ref->r); } urecord->op = *urecord_op; // Close the record for all the cases udf_record_close(urecord); // Write to XDR pipe after closing the record, in order to release the record lock as // early as possible. if (udf_xdr_ship_op == true) { if (UDF_OP_IS_WRITE(*urecord_op)) { cf_detail(AS_UDF, "UDF write shipping for key %" PRIx64, tr->keyd); xdr_write(tr->rsv.ns, tr->keyd, generation, 0, false, set_id); } else if (UDF_OP_IS_DELETE(*urecord_op)) { cf_detail(AS_UDF, "UDF delete shipping for key %" PRIx64, tr->keyd); xdr_write(tr->rsv.ns, tr->keyd, generation, 0, true, set_id); } } }