ibool dtuple_check_typed_no_assert( /*=========================*/ /* out: TRUE if ok */ dtuple_t* tuple) /* in: tuple */ { dfield_t* field; ulint i; if (dtuple_get_n_fields(tuple) > REC_MAX_N_FIELDS) { fprintf(stderr, "InnoDB: Error: index entry has %lu fields\n", (ulong) dtuple_get_n_fields(tuple)); dump: fputs("InnoDB: Tuple contents: ", stderr); dtuple_print(stderr, tuple); putc('\n', stderr); return(FALSE); } for (i = 0; i < dtuple_get_n_fields(tuple); i++) { field = dtuple_get_nth_field(tuple, i); if (!dfield_check_typed_no_assert(field)) { goto dump; } } return(TRUE); }
upd_t* row_upd_build_sec_rec_difference_binary( /*====================================*/ /* out, own: update vector of differing fields */ dict_index_t* index, /* in: index */ dtuple_t* entry, /* in: entry to insert */ rec_t* rec, /* in: secondary index record */ mem_heap_t* heap) /* in: memory heap from which allocated */ { upd_field_t* upd_field; dfield_t* dfield; byte* data; ulint len; upd_t* update; ulint n_diff; ulint i; /* This function is used only for a secondary index */ ut_ad(0 == (index->type & DICT_CLUSTERED)); update = upd_create(dtuple_get_n_fields(entry), heap); n_diff = 0; for (i = 0; i < dtuple_get_n_fields(entry); i++) { data = rec_get_nth_field(rec, i, &len); dfield = dtuple_get_nth_field(entry, i); ut_a(len == dfield_get_len(dfield)); /* NOTE: we compare the fields as binary strings! (No collation) */ if (!dfield_data_is_binary_equal(dfield, len, data)) { upd_field = upd_get_nth_field(update, n_diff); dfield_copy(&(upd_field->new_val), dfield); upd_field_set_field_no(upd_field, i, index); upd_field->extern_storage = FALSE; n_diff++; } } update->n_fields = n_diff; return(update); }
/**************************************************************//** Checks if a dtuple is a prefix of a record. The last field in dtuple is allowed to be a prefix of the corresponding field in the record. @return TRUE if prefix */ UNIV_INTERN ibool cmp_dtuple_is_prefix_of_rec( /*========================*/ const dtuple_t* dtuple, /*!< in: data tuple */ const rec_t* rec, /*!< in: physical record */ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */ { ulint n_fields; ulint matched_fields = 0; ulint matched_bytes = 0; ut_ad(rec_offs_validate(rec, NULL, offsets)); n_fields = dtuple_get_n_fields(dtuple); if (n_fields > rec_offs_n_fields(offsets)) { return(FALSE); } cmp_dtuple_rec_with_match(dtuple, rec, offsets, &matched_fields, &matched_bytes); if (matched_fields == n_fields) { return(TRUE); } if (matched_fields == n_fields - 1 && matched_bytes == dfield_get_len( dtuple_get_nth_field(dtuple, n_fields - 1))) { return(TRUE); } return(FALSE); }
void dtuple_print( /*=========*/ dtuple_t* tuple) /* in: tuple */ { dfield_t* field; ulint n_fields; ulint i; n_fields = dtuple_get_n_fields(tuple); printf("DATA TUPLE: %lu fields;\n", n_fields); for (i = 0; i < n_fields; i++) { printf(" %lu:", i); field = dtuple_get_nth_field(tuple, i); if (field->len != UNIV_SQL_NULL) { ut_print_buf(field->data, field->len); } else { printf(" SQL NULL"); } printf(";"); } printf("\n"); dtuple_validate(tuple); }
/***************************************************************//** Searches an index record. @return TRUE if found */ UNIV_INTERN ibool row_search_index_entry( /*===================*/ dict_index_t* index, /*!< in: index */ const dtuple_t* entry, /*!< in: index entry */ ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */ btr_pcur_t* pcur, /*!< in/out: persistent cursor, which must be closed by the caller */ mtr_t* mtr) /*!< in: mtr */ { ulint n_fields; ulint low_match; rec_t* rec; ut_ad(dtuple_check_typed(entry)); btr_pcur_open(index, entry, PAGE_CUR_LE, mode, pcur, mtr); low_match = btr_pcur_get_low_match(pcur); rec = btr_pcur_get_rec(pcur); n_fields = dtuple_get_n_fields(entry); return(!page_rec_is_infimum(rec) && low_match == n_fields); }
/**********************************************************//** The following function prints the contents of a tuple. */ UNIV_INTERN void dtuple_print( /*=========*/ FILE* f, /*!< in: output stream */ const dtuple_t* tuple) /*!< in: tuple */ { ulint n_fields; ulint i; n_fields = dtuple_get_n_fields(tuple); fprintf(f, "DATA TUPLE: %lu fields;\n", (ulong) n_fields); for (i = 0; i < n_fields; i++) { fprintf(f, " %lu:", (ulong) i); dfield_print_raw(f, dtuple_get_nth_field(tuple, i)); putc(';', f); putc('\n', f); } ut_ad(dtuple_validate(tuple)); }
void dtuple_print( /*=========*/ FILE* f, /* in: output stream */ dtuple_t* tuple) /* in: tuple */ { dfield_t* field; ulint n_fields; ulint i; n_fields = dtuple_get_n_fields(tuple); fprintf(f, "DATA TUPLE: %lu fields;\n", (ulong) n_fields); for (i = 0; i < n_fields; i++) { fprintf(f, " %lu:", (ulong) i); field = dtuple_get_nth_field(tuple, i); if (field->len != UNIV_SQL_NULL) { ut_print_buf(f, field->data, field->len); } else { fputs(" SQL NULL", f); } putc(';', f); } putc('\n', f); ut_ad(dtuple_validate(tuple)); }
ibool row_search_on_row_ref( /*==================*/ /* out: TRUE if found */ btr_pcur_t* pcur, /* in/out: persistent cursor, which must be closed by the caller */ ulint mode, /* in: BTR_MODIFY_LEAF, ... */ dict_table_t* table, /* in: table */ dtuple_t* ref, /* in: row reference */ mtr_t* mtr) /* in: mtr */ { ulint low_match; rec_t* rec; dict_index_t* index; page_t* page; ut_ad(dtuple_check_typed(ref)); index = dict_table_get_first_index(table); ut_a(dtuple_get_n_fields(ref) == dict_index_get_n_unique(index)); btr_pcur_open(index, ref, PAGE_CUR_LE, mode, pcur, mtr); low_match = btr_pcur_get_low_match(pcur); rec = btr_pcur_get_rec(pcur); page = buf_frame_align(rec); if (rec == page_get_infimum_rec(page)) { return(FALSE); } if (low_match != dtuple_get_n_fields(ref)) { return(FALSE); } return(TRUE); }
ibool dtuple_datas_are_ordering_equal( /*============================*/ /* out: TRUE if length and fieds are equal when compared with cmp_data_data: NOTE: in character type fields some letters are identified with others! (collation) */ dtuple_t* tuple1, /* in: tuple 1 */ dtuple_t* tuple2) /* in: tuple 2 */ { dfield_t* field1; dfield_t* field2; ulint n_fields; ulint i; ut_ad(tuple1 && tuple2); ut_ad(tuple1->magic_n == DATA_TUPLE_MAGIC_N); ut_ad(tuple2->magic_n == DATA_TUPLE_MAGIC_N); ut_ad(dtuple_check_typed(tuple1)); ut_ad(dtuple_check_typed(tuple2)); n_fields = dtuple_get_n_fields(tuple1); if (n_fields != dtuple_get_n_fields(tuple2)) { return(FALSE); } for (i = 0; i < n_fields; i++) { field1 = dtuple_get_nth_field(tuple1, i); field2 = dtuple_get_nth_field(tuple2, i); if (0 != cmp_dfield_dfield(field1, field2)) { return(FALSE); } } return(TRUE); }
/***************************************************************//** Searches an index record. @return whether the record was found or buffered */ UNIV_INTERN enum row_search_result row_search_index_entry( /*===================*/ dict_index_t* index, /*!< in: index */ const dtuple_t* entry, /*!< in: index entry */ ulint mode, /*!< in: BTR_MODIFY_LEAF, ... */ btr_pcur_t* pcur, /*!< in/out: persistent cursor, which must be closed by the caller */ mtr_t* mtr) /*!< in: mtr */ { ulint n_fields; ulint low_match; rec_t* rec; ut_ad(dtuple_check_typed(entry)); btr_pcur_open(index, entry, PAGE_CUR_LE, mode, pcur, mtr); switch (btr_pcur_get_btr_cur(pcur)->flag) { case BTR_CUR_DELETE_REF: ut_a(mode & BTR_DELETE); return(ROW_NOT_DELETED_REF); case BTR_CUR_DEL_MARK_IBUF: case BTR_CUR_DELETE_IBUF: case BTR_CUR_INSERT_TO_IBUF: return(ROW_BUFFERED); case BTR_CUR_HASH: case BTR_CUR_HASH_FAIL: case BTR_CUR_BINARY: break; } low_match = btr_pcur_get_low_match(pcur); rec = btr_pcur_get_rec(pcur); n_fields = dtuple_get_n_fields(entry); if (page_rec_is_infimum(rec)) { return(ROW_NOT_FOUND); } else if (low_match != n_fields) { return(ROW_NOT_FOUND); } return(ROW_FOUND); }
ibool dtuple_check_typed_no_assert( /*=========================*/ /* out: TRUE if ok */ dtuple_t* tuple) /* in: tuple */ { dfield_t* field; ulint i; char err_buf[1000]; if (dtuple_get_n_fields(tuple) > REC_MAX_N_FIELDS) { fprintf(stderr, "InnoDB: Error: index entry has %lu fields\n", dtuple_get_n_fields(tuple)); dtuple_sprintf(err_buf, 900, tuple); fprintf(stderr, "InnoDB: Tuple contents: %s\n", err_buf); return(FALSE); } for (i = 0; i < dtuple_get_n_fields(tuple); i++) { field = dtuple_get_nth_field(tuple, i); if (!dfield_check_typed_no_assert(field)) { dtuple_sprintf(err_buf, 900, tuple); fprintf(stderr, "InnoDB: Tuple contents: %s\n", err_buf); return(FALSE); } } return(TRUE); }
/************************************************************//** Compare two data tuples, respecting the collation of character fields. @return 1, 0 , -1 if tuple1 is greater, equal, less, respectively, than tuple2 */ UNIV_INTERN int dtuple_coll_cmp( /*============*/ const dtuple_t* tuple1, /*!< in: tuple 1 */ const dtuple_t* tuple2) /*!< in: tuple 2 */ { ulint n_fields; ulint i; ut_ad(tuple1 && tuple2); ut_ad(tuple1->magic_n == DATA_TUPLE_MAGIC_N); ut_ad(tuple2->magic_n == DATA_TUPLE_MAGIC_N); ut_ad(dtuple_check_typed(tuple1)); ut_ad(dtuple_check_typed(tuple2)); n_fields = dtuple_get_n_fields(tuple1); if (n_fields != dtuple_get_n_fields(tuple2)) { return(n_fields < dtuple_get_n_fields(tuple2) ? -1 : 1); } for (i = 0; i < n_fields; i++) { int cmp; const dfield_t* field1 = dtuple_get_nth_field(tuple1, i); const dfield_t* field2 = dtuple_get_nth_field(tuple2, i); cmp = cmp_dfield_dfield(field1, field2); if (cmp) { return(cmp); } } return(0); }
void row_build_row_ref_from_row( /*=======================*/ dtuple_t* ref, /* in/out: row reference built; see the NOTE below! ref must have the right number of fields! */ dict_table_t* table, /* in: table */ dtuple_t* row) /* in: row NOTE: the data fields in ref will point directly into data of this row */ { dict_index_t* clust_index; dict_field_t* field; dfield_t* dfield; dfield_t* dfield2; dict_col_t* col; ulint ref_len; ulint i; ut_ad(ref && table && row); clust_index = dict_table_get_first_index(table); ref_len = dict_index_get_n_unique(clust_index); ut_ad(ref_len == dtuple_get_n_fields(ref)); for (i = 0; i < ref_len; i++) { dfield = dtuple_get_nth_field(ref, i); field = dict_index_get_nth_field(clust_index, i); col = dict_field_get_col(field); dfield2 = dtuple_get_nth_field(row, dict_col_get_no(col)); dfield_copy(dfield, dfield2); if (field->prefix_len > 0 && dfield->len != UNIV_SQL_NULL && dfield->len > field->prefix_len) { dfield->len = field->prefix_len; } } ut_ad(dtuple_check_typed(ref)); }
/**********************************************************//** Validates the consistency of a tuple which must be complete, i.e, all fields must have been set. @return TRUE if ok */ UNIV_INTERN ibool dtuple_validate( /*============*/ const dtuple_t* tuple) /*!< in: tuple */ { const dfield_t* field; ulint n_fields; ulint len; ulint i; ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N); n_fields = dtuple_get_n_fields(tuple); /* We dereference all the data of each field to test for memory traps */ for (i = 0; i < n_fields; i++) { field = dtuple_get_nth_field(tuple, i); len = dfield_get_len(field); if (!dfield_is_null(field)) { const byte* data = dfield_get_data(field); #ifndef UNIV_DEBUG_VALGRIND ulint j; for (j = 0; j < len; j++) { data_dummy += *data; /* fool the compiler not to optimize out this code */ data++; } #endif /* !UNIV_DEBUG_VALGRIND */ UNIV_MEM_ASSERT_RW(data, len); } } ut_a(dtuple_check_typed(tuple)); return(TRUE); }
ibool dtuple_check_typed( /*===============*/ /* out: TRUE if ok */ dtuple_t* tuple) /* in: tuple */ { dfield_t* field; ulint i; for (i = 0; i < dtuple_get_n_fields(tuple); i++) { field = dtuple_get_nth_field(tuple, i); ut_a(dfield_check_typed(field)); } return(TRUE); }
/**********************************************************//** Checks that a data tuple is typed. Asserts an error if not. @return TRUE if ok */ UNIV_INTERN ibool dtuple_check_typed( /*===============*/ const dtuple_t* tuple) /*!< in: tuple */ { const dfield_t* field; ulint i; for (i = 0; i < dtuple_get_n_fields(tuple); i++) { field = dtuple_get_nth_field(tuple, i); ut_a(dfield_check_typed(field)); } return(TRUE); }
ulint dtuple_sprintf( /*===========*/ /* out: printed length in bytes */ char* buf, /* in: print buffer */ ulint buf_len,/* in: buf length in bytes */ dtuple_t* tuple) /* in: tuple */ { dfield_t* field; ulint n_fields; ulint len; ulint i; len = 0; n_fields = dtuple_get_n_fields(tuple); for (i = 0; i < n_fields; i++) { if (len + 30 > buf_len) { return(len); } len += sprintf(buf + len, " %lu:", i); field = dtuple_get_nth_field(tuple, i); if (field->len != UNIV_SQL_NULL) { if (5 * field->len + len + 30 > buf_len) { return(len); } len += ut_sprintf_buf(buf + len, field->data, field->len); } else { len += sprintf(buf + len, " SQL NULL"); } len += sprintf(buf + len, ";"); } return(len); }
ibool dtuple_validate( /*============*/ /* out: TRUE if ok */ dtuple_t* tuple) /* in: tuple */ { dfield_t* field; byte* data; ulint n_fields; ulint len; ulint i; ulint j; ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N); n_fields = dtuple_get_n_fields(tuple); /* We dereference all the data of each field to test for memory traps */ for (i = 0; i < n_fields; i++) { field = dtuple_get_nth_field(tuple, i); len = dfield_get_len(field); if (len != UNIV_SQL_NULL) { data = field->data; for (j = 0; j < len; j++) { data_dummy += *data; /* fool the compiler not to optimize out this code */ data++; } } } ut_a(dtuple_check_typed(tuple)); return(TRUE); }
ibool row_search_index_entry( /*===================*/ /* out: TRUE if found */ dict_index_t* index, /* in: index */ dtuple_t* entry, /* in: index entry */ ulint mode, /* in: BTR_MODIFY_LEAF, ... */ btr_pcur_t* pcur, /* in/out: persistent cursor, which must be closed by the caller */ mtr_t* mtr) /* in: mtr */ { ulint n_fields; ulint low_match; page_t* page; rec_t* rec; ut_ad(dtuple_check_typed(entry)); btr_pcur_open(index, entry, PAGE_CUR_LE, mode, pcur, mtr); low_match = btr_pcur_get_low_match(pcur); rec = btr_pcur_get_rec(pcur); page = buf_frame_align(rec); n_fields = dtuple_get_n_fields(entry); if (rec == page_get_infimum_rec(page)) { return(FALSE); } if (low_match != n_fields) { /* Not found */ return(FALSE); } return(TRUE); }
/*******************************************************************//** Builds from a secondary index record a row reference with which we can search the clustered index record. */ UNIV_INTERN void row_build_row_ref_in_tuple( /*=======================*/ dtuple_t* ref, /*!< in/out: row reference built; see the NOTE below! */ const rec_t* rec, /*!< in: record in the index; NOTE: the data fields in ref will point directly into this record, therefore, the buffer page of this record must be at least s-latched and the latch held as long as the row reference is used! */ const dict_index_t* index, /*!< in: secondary index */ ulint* offsets,/*!< in: rec_get_offsets(rec, index) or NULL */ trx_t* trx) /*!< in: transaction */ { const dict_index_t* clust_index; dfield_t* dfield; const byte* field; ulint len; ulint ref_len; ulint pos; ulint clust_col_prefix_len; ulint i; mem_heap_t* heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; rec_offs_init(offsets_); ut_a(ref); ut_a(index); ut_a(rec); ut_ad(!dict_index_is_clust(index)); if (UNIV_UNLIKELY(!index->table)) { fputs("InnoDB: table ", stderr); notfound: ut_print_name(stderr, trx, TRUE, index->table_name); fputs(" for index ", stderr); ut_print_name(stderr, trx, FALSE, index->name); fputs(" not found\n", stderr); ut_error; } clust_index = dict_table_get_first_index(index->table); if (UNIV_UNLIKELY(!clust_index)) { fputs("InnoDB: clust index for table ", stderr); goto notfound; } if (!offsets) { offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap); } else { ut_ad(rec_offs_validate(rec, index, offsets)); } /* Secondary indexes must not contain externally stored columns. */ ut_ad(!rec_offs_any_extern(offsets)); ref_len = dict_index_get_n_unique(clust_index); ut_ad(ref_len == dtuple_get_n_fields(ref)); dict_index_copy_types(ref, clust_index, ref_len); for (i = 0; i < ref_len; i++) { dfield = dtuple_get_nth_field(ref, i); pos = dict_index_get_nth_field_pos(index, clust_index, i); ut_a(pos != ULINT_UNDEFINED); field = rec_get_nth_field(rec, offsets, pos, &len); dfield_set_data(dfield, field, len); /* If the primary key contains a column prefix, then the secondary index may contain a longer prefix of the same column, or the full column, and we must adjust the length accordingly. */ clust_col_prefix_len = dict_index_get_nth_field( clust_index, i)->prefix_len; if (clust_col_prefix_len > 0) { if (len != UNIV_SQL_NULL) { const dtype_t* dtype = dfield_get_type(dfield); dfield_set_len(dfield, dtype_get_at_most_n_mbchars( dtype->prtype, dtype->mbminlen, dtype->mbmaxlen, clust_col_prefix_len, len, (char*) field)); } } } ut_ad(dtuple_check_typed(ref)); if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } }
ulint dtuple_get_n_fields_noninline( dtuple_t* tuple) /* in: tuple */ { return(dtuple_get_n_fields(tuple)); }
big_rec_t* dtuple_convert_big_rec( /*===================*/ /* out, own: created big record vector, NULL if we are not able to shorten the entry enough, i.e., if there are too many short fields in entry */ dict_index_t* index, /* in: index */ dtuple_t* entry, /* in: index entry */ ulint* ext_vec,/* in: array of externally stored fields, or NULL: if a field already is externally stored, then we cannot move it to the vector this function returns */ ulint n_ext_vec)/* in: number of elements is ext_vec */ { mem_heap_t* heap; big_rec_t* vector; dfield_t* dfield; ulint size; ulint n_fields; ulint longest; ulint longest_i = ULINT_MAX; ibool is_externally_stored; ulint i; ulint j; ut_a(dtuple_check_typed_no_assert(entry)); size = rec_get_converted_size(index, entry); if (UNIV_UNLIKELY(size > 1000000000)) { fprintf(stderr, "InnoDB: Warning: tuple size very big: %lu\n", (ulong) size); fputs("InnoDB: Tuple contents: ", stderr); dtuple_print(stderr, entry); putc('\n', stderr); } heap = mem_heap_create(size + dtuple_get_n_fields(entry) * sizeof(big_rec_field_t) + 1000); vector = mem_heap_alloc(heap, sizeof(big_rec_t)); vector->heap = heap; vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry) * sizeof(big_rec_field_t)); /* Decide which fields to shorten: the algorithm is to look for the longest field whose type is DATA_BLOB */ n_fields = 0; while (rec_get_converted_size(index, entry) >= ut_min(page_get_free_space_of_empty( index->table->comp) / 2, REC_MAX_DATA_SIZE)) { longest = 0; for (i = dict_index_get_n_unique_in_tree(index); i < dtuple_get_n_fields(entry); i++) { /* Skip over fields which already are externally stored */ is_externally_stored = FALSE; if (ext_vec) { for (j = 0; j < n_ext_vec; j++) { if (ext_vec[j] == i) { is_externally_stored = TRUE; } } } if (!is_externally_stored) { dfield = dtuple_get_nth_field(entry, i); if (dfield->len != UNIV_SQL_NULL && dfield->len > longest) { longest = dfield->len; longest_i = i; } } } /* We do not store externally fields which are smaller than DICT_MAX_INDEX_COL_LEN */ ut_a(DICT_MAX_INDEX_COL_LEN > REC_1BYTE_OFFS_LIMIT); if (longest < BTR_EXTERN_FIELD_REF_SIZE + 10 + DICT_MAX_INDEX_COL_LEN) { /* Cannot shorten more */ mem_heap_free(heap); return(NULL); } /* Move data from field longest_i to big rec vector; we do not let data size of the remaining entry drop below 128 which is the limit for the 2-byte offset storage format in a physical record. This we accomplish by storing 128 bytes of data in entry itself, and only the remaining part to big rec vec. We store the first bytes locally to the record. Then we can calculate all ordering fields in all indexes from locally stored data. */ dfield = dtuple_get_nth_field(entry, longest_i); vector->fields[n_fields].field_no = longest_i; ut_a(dfield->len > DICT_MAX_INDEX_COL_LEN); vector->fields[n_fields].len = dfield->len - DICT_MAX_INDEX_COL_LEN; vector->fields[n_fields].data = mem_heap_alloc(heap, vector->fields[n_fields].len); /* Copy data (from the end of field) to big rec vector */ ut_memcpy(vector->fields[n_fields].data, ((byte*)dfield->data) + dfield->len - vector->fields[n_fields].len, vector->fields[n_fields].len); dfield->len = dfield->len - vector->fields[n_fields].len + BTR_EXTERN_FIELD_REF_SIZE; /* Set the extern field reference in dfield to zero */ memset(((byte*)dfield->data) + dfield->len - BTR_EXTERN_FIELD_REF_SIZE, 0, BTR_EXTERN_FIELD_REF_SIZE); n_fields++; } vector->n_fields = n_fields; return(vector); }
rec_t* rec_convert_dtuple_to_rec_low( /*==========================*/ /* out: pointer to the origin of physical record */ byte* destination, /* in: start address of the physical record */ dtuple_t* dtuple, /* in: data tuple */ ulint data_size) /* in: data size of dtuple */ { dfield_t* field; ulint n_fields; rec_t* rec; ulint end_offset; ulint ored_offset; byte* data; ulint len; ulint i; ut_ad(destination && dtuple); ut_ad(dtuple_validate(dtuple)); ut_ad(dtuple_check_typed(dtuple)); ut_ad(dtuple_get_data_size(dtuple) == data_size); n_fields = dtuple_get_n_fields(dtuple); ut_ad(n_fields > 0); /* Calculate the offset of the origin in the physical record */ rec = destination + rec_get_converted_extra_size(data_size, n_fields); /* Store the number of fields */ rec_set_n_fields(rec, n_fields); /* Set the info bits of the record */ rec_set_info_bits(rec, dtuple_get_info_bits(dtuple)); /* Store the data and the offsets */ end_offset = 0; if (data_size <= REC_1BYTE_OFFS_LIMIT) { rec_set_1byte_offs_flag(rec, TRUE); for (i = 0; i < n_fields; i++) { field = dtuple_get_nth_field(dtuple, i); data = dfield_get_data(field); len = dfield_get_len(field); if (len == UNIV_SQL_NULL) { len = dtype_get_sql_null_size(dfield_get_type(field)); data_write_sql_null(rec + end_offset, len); end_offset += len; ored_offset = end_offset | REC_1BYTE_SQL_NULL_MASK; } else { /* If the data is not SQL null, store it */ ut_memcpy(rec + end_offset, data, len); end_offset += len; ored_offset = end_offset; } rec_1_set_field_end_info(rec, i, ored_offset); } } else { rec_set_1byte_offs_flag(rec, FALSE); for (i = 0; i < n_fields; i++) { field = dtuple_get_nth_field(dtuple, i); data = dfield_get_data(field); len = dfield_get_len(field); if (len == UNIV_SQL_NULL) { len = dtype_get_sql_null_size(dfield_get_type(field)); data_write_sql_null(rec + end_offset, len); end_offset += len; ored_offset = end_offset | REC_2BYTE_SQL_NULL_MASK; } else { /* If the data is not SQL null, store it */ ut_memcpy(rec + end_offset, data, len); end_offset += len; ored_offset = end_offset; } rec_2_set_field_end_info(rec, i, ored_offset); } } ut_ad(rec_validate(rec)); return(rec); }
/**************************************************************//** Moves parts of long fields in entry to the big record vector so that the size of tuple drops below the maximum record size allowed in the database. Moves data only from those fields which are not necessary to determine uniquely the insertion place of the tuple in the index. @return own: created big record vector, NULL if we are not able to shorten the entry enough, i.e., if there are too many fixed-length or short fields in entry or the index is clustered */ UNIV_INTERN big_rec_t* dtuple_convert_big_rec( /*===================*/ dict_index_t* index, /*!< in: index */ dtuple_t* entry, /*!< in/out: index entry */ ulint* n_ext) /*!< in/out: number of externally stored columns */ { mem_heap_t* heap; big_rec_t* vector; dfield_t* dfield; dict_field_t* ifield; ulint size; ulint n_fields; ulint local_len; ulint local_prefix_len; if (UNIV_UNLIKELY(!dict_index_is_clust(index))) { return(NULL); } if (dict_table_get_format(index->table) < DICT_TF_FORMAT_ZIP) { /* up to MySQL 5.1: store a 768-byte prefix locally */ local_len = BTR_EXTERN_FIELD_REF_SIZE + DICT_MAX_INDEX_COL_LEN; } else { /* new-format table: do not store any BLOB prefix locally */ local_len = BTR_EXTERN_FIELD_REF_SIZE; } ut_a(dtuple_check_typed_no_assert(entry)); size = rec_get_converted_size(index, entry, *n_ext); if (UNIV_UNLIKELY(size > 1000000000)) { fprintf(stderr, "InnoDB: Warning: tuple size very big: %lu\n", (ulong) size); fputs("InnoDB: Tuple contents: ", stderr); dtuple_print(stderr, entry); putc('\n', stderr); } heap = mem_heap_create(size + dtuple_get_n_fields(entry) * sizeof(big_rec_field_t) + 1000); vector = mem_heap_alloc(heap, sizeof(big_rec_t)); vector->heap = heap; vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry) * sizeof(big_rec_field_t)); /* Decide which fields to shorten: the algorithm is to look for a variable-length field that yields the biggest savings when stored externally */ n_fields = 0; while (page_zip_rec_needs_ext(rec_get_converted_size(index, entry, *n_ext), dict_table_is_comp(index->table), dict_index_get_n_fields(index), dict_table_zip_size(index->table))) { ulint i; ulint longest = 0; ulint longest_i = ULINT_MAX; byte* data; big_rec_field_t* b; for (i = dict_index_get_n_unique_in_tree(index); i < dtuple_get_n_fields(entry); i++) { ulint savings; dfield = dtuple_get_nth_field(entry, i); ifield = dict_index_get_nth_field(index, i); /* Skip fixed-length, NULL, externally stored, or short columns */ if (ifield->fixed_len || dfield_is_null(dfield) || dfield_is_ext(dfield) || dfield_get_len(dfield) <= local_len || dfield_get_len(dfield) <= BTR_EXTERN_FIELD_REF_SIZE * 2) { goto skip_field; } savings = dfield_get_len(dfield) - local_len; /* Check that there would be savings */ if (longest >= savings) { goto skip_field; } longest_i = i; longest = savings; skip_field: continue; } if (!longest) { /* Cannot shorten more */ mem_heap_free(heap); return(NULL); } /* Move data from field longest_i to big rec vector. We store the first bytes locally to the record. Then we can calculate all ordering fields in all indexes from locally stored data. */ dfield = dtuple_get_nth_field(entry, longest_i); ifield = dict_index_get_nth_field(index, longest_i); local_prefix_len = local_len - BTR_EXTERN_FIELD_REF_SIZE; b = &vector->fields[n_fields]; b->field_no = longest_i; b->len = dfield_get_len(dfield) - local_prefix_len; b->data = (char*) dfield_get_data(dfield) + local_prefix_len; /* Allocate the locally stored part of the column. */ data = mem_heap_alloc(heap, local_len); /* Copy the local prefix. */ memcpy(data, dfield_get_data(dfield), local_prefix_len); /* Clear the extern field reference (BLOB pointer). */ memset(data + local_prefix_len, 0, BTR_EXTERN_FIELD_REF_SIZE); #if 0 /* The following would fail the Valgrind checks in page_cur_insert_rec_low() and page_cur_insert_rec_zip(). The BLOB pointers in the record will be initialized after the record and the BLOBs have been written. */ UNIV_MEM_ALLOC(data + local_prefix_len, BTR_EXTERN_FIELD_REF_SIZE); #endif dfield_set_data(dfield, data, local_len); dfield_set_ext(dfield); n_fields++; (*n_ext)++; ut_ad(n_fields < dtuple_get_n_fields(entry)); } vector->n_fields = n_fields; return(vector); }
void row_build_row_ref_in_tuple( /*=======================*/ dtuple_t* ref, /* in/out: row reference built; see the NOTE below! */ dict_index_t* index, /* in: index */ rec_t* rec) /* in: record in the index; NOTE: the data fields in ref will point directly into this record, therefore, the buffer page of this record must be at least s-latched and the latch held as long as the row reference is used! */ { dict_table_t* table; dict_index_t* clust_index; dfield_t* dfield; byte* field; ulint len; ulint ref_len; ulint pos; ulint i; ut_a(ref && index && rec); table = index->table; if (!table) { fprintf(stderr, "InnoDB: table %s for index %s not found\n", index->table_name, index->name); ut_a(0); } clust_index = dict_table_get_first_index(table); if (!clust_index) { fprintf(stderr, "InnoDB: clust index for table %s for index %s not found\n", index->table_name, index->name); ut_a(0); } ref_len = dict_index_get_n_unique(clust_index); ut_ad(ref_len == dtuple_get_n_fields(ref)); dict_index_copy_types(ref, clust_index, ref_len); for (i = 0; i < ref_len; i++) { dfield = dtuple_get_nth_field(ref, i); pos = dict_index_get_nth_field_pos(index, clust_index, i); ut_a(pos != ULINT_UNDEFINED); field = rec_get_nth_field(rec, pos, &len); dfield_set_data(dfield, field, len); } ut_ad(dtuple_check_typed(ref)); }
/*****************************************************************//** Finds out if an active transaction has inserted or modified a secondary index record. NOTE: the kernel mutex is temporarily released in this function! @return NULL if committed, else the active transaction */ UNIV_INTERN trx_t* row_vers_impl_x_locked_off_kernel( /*==============================*/ const rec_t* rec, /*!< in: record in a secondary index */ dict_index_t* index, /*!< in: the secondary index */ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */ { dict_index_t* clust_index; rec_t* clust_rec; ulint* clust_offsets; rec_t* version; trx_id_t trx_id; mem_heap_t* heap; mem_heap_t* heap2; dtuple_t* row; dtuple_t* entry = NULL; /* assignment to eliminate compiler warning */ trx_t* trx; ulint rec_del; #ifdef UNIV_DEBUG ulint err; #endif /* UNIV_DEBUG */ mtr_t mtr; ulint comp; ut_ad(mutex_own(&kernel_mutex)); #ifdef UNIV_SYNC_DEBUG ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); #endif /* UNIV_SYNC_DEBUG */ mutex_exit(&kernel_mutex); mtr_start(&mtr); /* Search for the clustered index record: this is a time-consuming operation: therefore we release the kernel mutex; also, the release is required by the latching order convention. The latch on the clustered index locks the top of the stack of versions. We also reserve purge_latch to lock the bottom of the version stack. */ clust_rec = row_get_clust_rec(BTR_SEARCH_LEAF, rec, index, &clust_index, &mtr); if (!clust_rec) { /* In a rare case it is possible that no clust rec is found for a secondary index record: if in row0umod.c row_undo_mod_remove_clust_low() we have already removed the clust rec, while purge is still cleaning and removing secondary index records associated with earlier versions of the clustered index record. In that case there cannot be any implicit lock on the secondary index record, because an active transaction which has modified the secondary index record has also modified the clustered index record. And in a rollback we always undo the modifications to secondary index records before the clustered index record. */ mutex_enter(&kernel_mutex); mtr_commit(&mtr); return(NULL); } heap = mem_heap_create(1024); clust_offsets = rec_get_offsets(clust_rec, clust_index, NULL, ULINT_UNDEFINED, &heap); trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets); mtr_s_lock(&(purge_sys->latch), &mtr); mutex_enter(&kernel_mutex); trx = NULL; if (!trx_is_active(trx_id)) { /* The transaction that modified or inserted clust_rec is no longer active: no implicit lock on rec */ goto exit_func; } if (!lock_check_trx_id_sanity(trx_id, clust_rec, clust_index, clust_offsets, TRUE)) { /* Corruption noticed: try to avoid a crash by returning */ goto exit_func; } comp = page_rec_is_comp(rec); ut_ad(index->table == clust_index->table); ut_ad(!!comp == dict_table_is_comp(index->table)); ut_ad(!comp == !page_rec_is_comp(clust_rec)); /* We look up if some earlier version, which was modified by the trx_id transaction, of the clustered index record would require rec to be in a different state (delete marked or unmarked, or have different field values, or not existing). If there is such a version, then rec was modified by the trx_id transaction, and it has an implicit x-lock on rec. Note that if clust_rec itself would require rec to be in a different state, then the trx_id transaction has not yet had time to modify rec, and does not necessarily have an implicit x-lock on rec. */ rec_del = rec_get_deleted_flag(rec, comp); trx = NULL; version = clust_rec; for (;;) { rec_t* prev_version; ulint vers_del; row_ext_t* ext; trx_id_t prev_trx_id; mutex_exit(&kernel_mutex); /* While we retrieve an earlier version of clust_rec, we release the kernel mutex, because it may take time to access the disk. After the release, we have to check if the trx_id transaction is still active. We keep the semaphore in mtr on the clust_rec page, so that no other transaction can update it and get an implicit x-lock on rec. */ heap2 = heap; heap = mem_heap_create(1024); #ifdef UNIV_DEBUG err = #endif /* UNIV_DEBUG */ trx_undo_prev_version_build(clust_rec, &mtr, version, clust_index, clust_offsets, heap, &prev_version); mem_heap_free(heap2); /* free version and clust_offsets */ if (prev_version == NULL) { mutex_enter(&kernel_mutex); if (!trx_is_active(trx_id)) { /* Transaction no longer active: no implicit x-lock */ break; } /* If the transaction is still active, clust_rec must be a fresh insert, because no previous version was found. */ ut_ad(err == DB_SUCCESS); /* It was a freshly inserted version: there is an implicit x-lock on rec */ trx = trx_get_on_id(trx_id); break; } clust_offsets = rec_get_offsets(prev_version, clust_index, NULL, ULINT_UNDEFINED, &heap); vers_del = rec_get_deleted_flag(prev_version, comp); prev_trx_id = row_get_rec_trx_id(prev_version, clust_index, clust_offsets); /* The stack of versions is locked by mtr. Thus, it is safe to fetch the prefixes for externally stored columns. */ row = row_build(ROW_COPY_POINTERS, clust_index, prev_version, clust_offsets, NULL, &ext, heap); entry = row_build_index_entry(row, ext, index, heap); /* entry may be NULL if a record was inserted in place of a deleted record, and the BLOB pointers of the new record were not initialized yet. But in that case, prev_version should be NULL. */ ut_a(entry); mutex_enter(&kernel_mutex); if (!trx_is_active(trx_id)) { /* Transaction no longer active: no implicit x-lock */ break; } /* If we get here, we know that the trx_id transaction is still active and it has modified prev_version. Let us check if prev_version would require rec to be in a different state. */ /* The previous version of clust_rec must be accessible, because the transaction is still active and clust_rec was not a fresh insert. */ ut_ad(err == DB_SUCCESS); /* We check if entry and rec are identified in the alphabetical ordering */ if (0 == cmp_dtuple_rec(entry, rec, offsets)) { /* The delete marks of rec and prev_version should be equal for rec to be in the state required by prev_version */ if (rec_del != vers_del) { trx = trx_get_on_id(trx_id); break; } /* It is possible that the row was updated so that the secondary index record remained the same in alphabetical ordering, but the field values changed still. For example, 'abc' -> 'ABC'. Check also that. */ dtuple_set_types_binary(entry, dtuple_get_n_fields(entry)); if (0 != cmp_dtuple_rec(entry, rec, offsets)) { trx = trx_get_on_id(trx_id); break; } } else if (!rec_del) { /* The delete mark should be set in rec for it to be in the state required by prev_version */ trx = trx_get_on_id(trx_id); break; } if (0 != ut_dulint_cmp(trx_id, prev_trx_id)) { /* The versions modified by the trx_id transaction end to prev_version: no implicit x-lock */ break; } version = prev_version; }/* for (;;) */ exit_func: mtr_commit(&mtr); mem_heap_free(heap); return(trx); }
upd_t* row_upd_build_difference_binary( /*============================*/ /* out, own: update vector of differing fields, excluding roll ptr and trx id */ dict_index_t* index, /* in: clustered index */ dtuple_t* entry, /* in: entry to insert */ ulint* ext_vec,/* in: array containing field numbers of externally stored fields in entry, or NULL */ ulint n_ext_vec,/* in: number of fields in ext_vec */ rec_t* rec, /* in: clustered index record */ mem_heap_t* heap) /* in: memory heap from which allocated */ { upd_field_t* upd_field; dfield_t* dfield; byte* data; ulint len; upd_t* update; ulint n_diff; ulint roll_ptr_pos; ulint trx_id_pos; ulint i; /* This function is used only for a clustered index */ ut_a(index->type & DICT_CLUSTERED); update = upd_create(dtuple_get_n_fields(entry), heap); n_diff = 0; roll_ptr_pos = dict_index_get_sys_col_pos(index, DATA_ROLL_PTR); trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID); for (i = 0; i < dtuple_get_n_fields(entry); i++) { data = rec_get_nth_field(rec, i, &len); dfield = dtuple_get_nth_field(entry, i); /* NOTE: we compare the fields as binary strings! (No collation) */ if (i == trx_id_pos || i == roll_ptr_pos) { goto skip_compare; } if (rec_get_nth_field_extern_bit(rec, i) != upd_ext_vec_contains(ext_vec, n_ext_vec, i) || !dfield_data_is_binary_equal(dfield, len, data)) { upd_field = upd_get_nth_field(update, n_diff); dfield_copy(&(upd_field->new_val), dfield); upd_field_set_field_no(upd_field, i, index); if (upd_ext_vec_contains(ext_vec, n_ext_vec, i)) { upd_field->extern_storage = TRUE; } else { upd_field->extern_storage = FALSE; } n_diff++; } skip_compare: ; } update->n_fields = n_diff; return(update); }