big_rec_t* dtuple_convert_big_rec( /*===================*/ /* out, own: created big record vector, NULL if we are not able to shorten the entry enough, i.e., if there are too many short fields in entry */ dict_index_t* index, /* in: index */ dtuple_t* entry, /* in: index entry */ ulint* ext_vec,/* in: array of externally stored fields, or NULL: if a field already is externally stored, then we cannot move it to the vector this function returns */ ulint n_ext_vec)/* in: number of elements is ext_vec */ { mem_heap_t* heap; big_rec_t* vector; dfield_t* dfield; ulint size; ulint n_fields; ulint longest; ulint longest_i = ULINT_MAX; ibool is_externally_stored; ulint i; ulint j; ut_a(dtuple_check_typed_no_assert(entry)); size = rec_get_converted_size(index, entry); if (UNIV_UNLIKELY(size > 1000000000)) { fprintf(stderr, "InnoDB: Warning: tuple size very big: %lu\n", (ulong) size); fputs("InnoDB: Tuple contents: ", stderr); dtuple_print(stderr, entry); putc('\n', stderr); } heap = mem_heap_create(size + dtuple_get_n_fields(entry) * sizeof(big_rec_field_t) + 1000); vector = mem_heap_alloc(heap, sizeof(big_rec_t)); vector->heap = heap; vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry) * sizeof(big_rec_field_t)); /* Decide which fields to shorten: the algorithm is to look for the longest field whose type is DATA_BLOB */ n_fields = 0; while (rec_get_converted_size(index, entry) >= ut_min(page_get_free_space_of_empty( index->table->comp) / 2, REC_MAX_DATA_SIZE)) { longest = 0; for (i = dict_index_get_n_unique_in_tree(index); i < dtuple_get_n_fields(entry); i++) { /* Skip over fields which already are externally stored */ is_externally_stored = FALSE; if (ext_vec) { for (j = 0; j < n_ext_vec; j++) { if (ext_vec[j] == i) { is_externally_stored = TRUE; } } } if (!is_externally_stored) { dfield = dtuple_get_nth_field(entry, i); if (dfield->len != UNIV_SQL_NULL && dfield->len > longest) { longest = dfield->len; longest_i = i; } } } /* We do not store externally fields which are smaller than DICT_MAX_INDEX_COL_LEN */ ut_a(DICT_MAX_INDEX_COL_LEN > REC_1BYTE_OFFS_LIMIT); if (longest < BTR_EXTERN_FIELD_REF_SIZE + 10 + DICT_MAX_INDEX_COL_LEN) { /* Cannot shorten more */ mem_heap_free(heap); return(NULL); } /* Move data from field longest_i to big rec vector; we do not let data size of the remaining entry drop below 128 which is the limit for the 2-byte offset storage format in a physical record. This we accomplish by storing 128 bytes of data in entry itself, and only the remaining part to big rec vec. We store the first bytes locally to the record. Then we can calculate all ordering fields in all indexes from locally stored data. */ dfield = dtuple_get_nth_field(entry, longest_i); vector->fields[n_fields].field_no = longest_i; ut_a(dfield->len > DICT_MAX_INDEX_COL_LEN); vector->fields[n_fields].len = dfield->len - DICT_MAX_INDEX_COL_LEN; vector->fields[n_fields].data = mem_heap_alloc(heap, vector->fields[n_fields].len); /* Copy data (from the end of field) to big rec vector */ ut_memcpy(vector->fields[n_fields].data, ((byte*)dfield->data) + dfield->len - vector->fields[n_fields].len, vector->fields[n_fields].len); dfield->len = dfield->len - vector->fields[n_fields].len + BTR_EXTERN_FIELD_REF_SIZE; /* Set the extern field reference in dfield to zero */ memset(((byte*)dfield->data) + dfield->len - BTR_EXTERN_FIELD_REF_SIZE, 0, BTR_EXTERN_FIELD_REF_SIZE); n_fields++; } vector->n_fields = n_fields; return(vector); }
/**************************************************************//** Moves parts of long fields in entry to the big record vector so that the size of tuple drops below the maximum record size allowed in the database. Moves data only from those fields which are not necessary to determine uniquely the insertion place of the tuple in the index. @return own: created big record vector, NULL if we are not able to shorten the entry enough, i.e., if there are too many fixed-length or short fields in entry or the index is clustered */ UNIV_INTERN big_rec_t* dtuple_convert_big_rec( /*===================*/ dict_index_t* index, /*!< in: index */ dtuple_t* entry, /*!< in/out: index entry */ ulint* n_ext) /*!< in/out: number of externally stored columns */ { mem_heap_t* heap; big_rec_t* vector; dfield_t* dfield; dict_field_t* ifield; ulint size; ulint n_fields; ulint local_len; ulint local_prefix_len; if (UNIV_UNLIKELY(!dict_index_is_clust(index))) { return(NULL); } if (dict_table_get_format(index->table) < DICT_TF_FORMAT_ZIP) { /* up to MySQL 5.1: store a 768-byte prefix locally */ local_len = BTR_EXTERN_FIELD_REF_SIZE + DICT_MAX_INDEX_COL_LEN; } else { /* new-format table: do not store any BLOB prefix locally */ local_len = BTR_EXTERN_FIELD_REF_SIZE; } ut_a(dtuple_check_typed_no_assert(entry)); size = rec_get_converted_size(index, entry, *n_ext); if (UNIV_UNLIKELY(size > 1000000000)) { fprintf(stderr, "InnoDB: Warning: tuple size very big: %lu\n", (ulong) size); fputs("InnoDB: Tuple contents: ", stderr); dtuple_print(stderr, entry); putc('\n', stderr); } heap = mem_heap_create(size + dtuple_get_n_fields(entry) * sizeof(big_rec_field_t) + 1000); vector = mem_heap_alloc(heap, sizeof(big_rec_t)); vector->heap = heap; vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry) * sizeof(big_rec_field_t)); /* Decide which fields to shorten: the algorithm is to look for a variable-length field that yields the biggest savings when stored externally */ n_fields = 0; while (page_zip_rec_needs_ext(rec_get_converted_size(index, entry, *n_ext), dict_table_is_comp(index->table), dict_index_get_n_fields(index), dict_table_zip_size(index->table))) { ulint i; ulint longest = 0; ulint longest_i = ULINT_MAX; byte* data; big_rec_field_t* b; for (i = dict_index_get_n_unique_in_tree(index); i < dtuple_get_n_fields(entry); i++) { ulint savings; dfield = dtuple_get_nth_field(entry, i); ifield = dict_index_get_nth_field(index, i); /* Skip fixed-length, NULL, externally stored, or short columns */ if (ifield->fixed_len || dfield_is_null(dfield) || dfield_is_ext(dfield) || dfield_get_len(dfield) <= local_len || dfield_get_len(dfield) <= BTR_EXTERN_FIELD_REF_SIZE * 2) { goto skip_field; } savings = dfield_get_len(dfield) - local_len; /* Check that there would be savings */ if (longest >= savings) { goto skip_field; } longest_i = i; longest = savings; skip_field: continue; } if (!longest) { /* Cannot shorten more */ mem_heap_free(heap); return(NULL); } /* Move data from field longest_i to big rec vector. We store the first bytes locally to the record. Then we can calculate all ordering fields in all indexes from locally stored data. */ dfield = dtuple_get_nth_field(entry, longest_i); ifield = dict_index_get_nth_field(index, longest_i); local_prefix_len = local_len - BTR_EXTERN_FIELD_REF_SIZE; b = &vector->fields[n_fields]; b->field_no = longest_i; b->len = dfield_get_len(dfield) - local_prefix_len; b->data = (char*) dfield_get_data(dfield) + local_prefix_len; /* Allocate the locally stored part of the column. */ data = mem_heap_alloc(heap, local_len); /* Copy the local prefix. */ memcpy(data, dfield_get_data(dfield), local_prefix_len); /* Clear the extern field reference (BLOB pointer). */ memset(data + local_prefix_len, 0, BTR_EXTERN_FIELD_REF_SIZE); #if 0 /* The following would fail the Valgrind checks in page_cur_insert_rec_low() and page_cur_insert_rec_zip(). The BLOB pointers in the record will be initialized after the record and the BLOBs have been written. */ UNIV_MEM_ALLOC(data + local_prefix_len, BTR_EXTERN_FIELD_REF_SIZE); #endif dfield_set_data(dfield, data, local_len); dfield_set_ext(dfield); n_fields++; (*n_ext)++; ut_ad(n_fields < dtuple_get_n_fields(entry)); } vector->n_fields = n_fields; return(vector); }