Skip to content

Commit 40e213f

Browse files
committed
Merge branch 'merge-tokudb-5.6' into 10.0
2 parents b278c02 + b1a2031 commit 40e213f

File tree

118 files changed

+35042
-479
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

118 files changed

+35042
-479
lines changed

storage/tokudb/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
SET(TOKUDB_VERSION 5.6.36-82.0)
1+
SET(TOKUDB_VERSION 5.6.36-82.1)
22
# PerconaFT only supports x86-64 and cmake-2.8.9+
33
IF(CMAKE_VERSION VERSION_LESS "2.8.9")
44
MESSAGE(STATUS "CMake 2.8.9 or higher is required by TokuDB")

storage/tokudb/PerconaFT/CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,16 @@ project(TokuDB)
99
set(CMAKE_SHARED_LIBRARY_LINK_C_FLAGS "")
1010
set(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "")
1111

12+
# detect when we are being built as a subproject
13+
if (DEFINED MYSQL_PROJECT_NAME_DOCSTRING)
14+
add_definitions( -DMYSQL_TOKUDB_ENGINE=1)
15+
if ((CMAKE_BUILD_TYPE MATCHES "Debug") AND
16+
(CMAKE_CXX_FLAGS_DEBUG MATCHES " -DENABLED_DEBUG_SYNC"))
17+
include_directories(${CMAKE_SOURCE_DIR}/include)
18+
include_directories(${CMAKE_SOURCE_DIR}/sql)
19+
endif ()
20+
endif ()
21+
1222
## Versions of gcc >= 4.9.0 require special version of 'ar' and 'ranlib' for
1323
## link-time optimizations to work properly.
1424
##

storage/tokudb/PerconaFT/buildheader/make_tdb.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,7 @@ static void print_db_env_struct (void) {
428428
"int (*dirtool_attach)(DB_ENV *, DB_TXN *, const char *, const char *)",
429429
"int (*dirtool_detach)(DB_ENV *, DB_TXN *, const char *)",
430430
"int (*dirtool_move)(DB_ENV *, DB_TXN *, const char *, const char *)",
431+
"void (*kill_waiter)(DB_ENV *, void *extra)",
431432
NULL};
432433

433434
sort_and_dump_fields("db_env", true, extra);
@@ -548,8 +549,8 @@ static void print_db_txn_struct (void) {
548549
"int (*abort_with_progress)(DB_TXN*, TXN_PROGRESS_POLL_FUNCTION, void*)",
549550
"int (*xa_prepare) (DB_TXN*, TOKU_XA_XID *, uint32_t flags)",
550551
"uint64_t (*id64) (DB_TXN*)",
551-
"void (*set_client_id)(DB_TXN *, uint64_t client_id)",
552-
"uint64_t (*get_client_id)(DB_TXN *)",
552+
"void (*set_client_id)(DB_TXN *, uint64_t client_id, void *client_extra)",
553+
"void (*get_client_id)(DB_TXN *, uint64_t *client_id, void **client_extra)",
553554
"bool (*is_prepared)(DB_TXN *)",
554555
"DB_TXN *(*get_child)(DB_TXN *)",
555556
"uint64_t (*get_start_time)(DB_TXN *)",

storage/tokudb/PerconaFT/cmake_modules/TokuThirdParty.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,9 @@ ExternalProject_Add(build_snappy
123123
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
124124
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
125125
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
126+
-DCMAKE_AR=${CMAKE_AR}
127+
-DCMAKE_NM=${CMAKE_NM}
128+
-DCMAKE_RANLIB=${CMAKE_RANLIB}
126129
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
127130
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
128131
${USE_PROJECT_CMAKE_MODULE_PATH}

storage/tokudb/PerconaFT/ft/cachetable/cachetable.cc

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -464,7 +464,10 @@ int toku_cachetable_openf (CACHEFILE *cfptr, CACHETABLE ct, const char *fname_in
464464

465465
char *
466466
toku_cachefile_fname_in_env (CACHEFILE cf) {
467-
return cf->fname_in_env;
467+
if (cf) {
468+
return cf->fname_in_env;
469+
}
470+
return nullptr;
468471
}
469472

470473
void toku_cachefile_set_fname_in_env(CACHEFILE cf, char *new_fname_in_env) {
@@ -2890,6 +2893,10 @@ toku_cachefile_get_cachetable(CACHEFILE cf) {
28902893
return cf->cachetable;
28912894
}
28922895

2896+
CACHEFILE toku_pair_get_cachefile(PAIR pair) {
2897+
return pair->cachefile;
2898+
}
2899+
28932900
//Only called by ft_end_checkpoint
28942901
//Must have access to cf->fd (must be protected)
28952902
void toku_cachefile_fsync(CACHEFILE cf) {

storage/tokudb/PerconaFT/ft/cachetable/cachetable.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,6 +297,9 @@ void *toku_cachefile_get_userdata(CACHEFILE);
297297
CACHETABLE toku_cachefile_get_cachetable(CACHEFILE cf);
298298
// Effect: Get the cachetable.
299299

300+
CACHEFILE toku_pair_get_cachefile(PAIR);
301+
// Effect: Get the cachefile of the pair
302+
300303
void toku_cachetable_swap_pair_values(PAIR old_pair, PAIR new_pair);
301304
// Effect: Swaps the value_data of old_pair and new_pair.
302305
// Requires: both old_pair and new_pair to be pinned with write locks.

storage/tokudb/PerconaFT/ft/ft-ops.cc

Lines changed: 77 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -651,8 +651,12 @@ void toku_ftnode_clone_callback(void *value_data,
651651
// set new pair attr if necessary
652652
if (node->height == 0) {
653653
*new_attr = make_ftnode_pair_attr(node);
654-
node->logical_rows_delta = 0;
655-
cloned_node->logical_rows_delta = 0;
654+
for (int i = 0; i < node->n_children; i++) {
655+
if (BP_STATE(node, i) == PT_AVAIL) {
656+
BLB_LRD(node, i) = 0;
657+
BLB_LRD(cloned_node, i) = 0;
658+
}
659+
}
656660
} else {
657661
new_attr->is_valid = false;
658662
}
@@ -700,9 +704,26 @@ void toku_ftnode_flush_callback(CACHEFILE UU(cachefile),
700704
if (ftnode->height == 0) {
701705
FT_STATUS_INC(FT_FULL_EVICTIONS_LEAF, 1);
702706
FT_STATUS_INC(FT_FULL_EVICTIONS_LEAF_BYTES, node_size);
703-
if (!ftnode->dirty) {
704-
toku_ft_adjust_logical_row_count(
705-
ft, -ftnode->logical_rows_delta);
707+
708+
// A leaf node (height == 0) is being evicted (!keep_me) and is
709+
// not a checkpoint clone (!is_clone). This leaf node may have
710+
// had messages applied to satisfy a query, but was never
711+
// actually dirtied (!ftnode->dirty && !write_me). **Note that
712+
// if (write_me) would persist the node and clear the dirty
713+
// flag **. This message application may have updated the trees
714+
// logical row count. Since these message applications are not
715+
// persisted, we need undo the logical row count adjustments as
716+
// they may occur again in the future if/when the node is
717+
// re-read from disk for another query or change.
718+
if (!ftnode->dirty && !write_me) {
719+
int64_t lrc_delta = 0;
720+
for (int i = 0; i < ftnode->n_children; i++) {
721+
if (BP_STATE(ftnode, i) == PT_AVAIL) {
722+
lrc_delta -= BLB_LRD(ftnode, i);
723+
BLB_LRD(ftnode, i) = 0;
724+
}
725+
}
726+
toku_ft_adjust_logical_row_count(ft, lrc_delta);
706727
}
707728
} else {
708729
FT_STATUS_INC(FT_FULL_EVICTIONS_NONLEAF, 1);
@@ -711,17 +732,18 @@ void toku_ftnode_flush_callback(CACHEFILE UU(cachefile),
711732
toku_free(*disk_data);
712733
} else {
713734
if (ftnode->height == 0) {
735+
// No need to adjust logical row counts when flushing a clone
736+
// as they should have been zeroed out anyway when cloned.
737+
// Clones are 'copies' of work already done so doing it again
738+
// (adjusting row counts) would be redundant and leads to
739+
// inaccurate counts.
714740
for (int i = 0; i < ftnode->n_children; i++) {
715741
if (BP_STATE(ftnode, i) == PT_AVAIL) {
716742
BASEMENTNODE bn = BLB(ftnode, i);
717743
toku_ft_decrease_stats(&ft->in_memory_stats,
718744
bn->stat64_delta);
719745
}
720746
}
721-
if (!ftnode->dirty) {
722-
toku_ft_adjust_logical_row_count(
723-
ft, -ftnode->logical_rows_delta);
724-
}
725747
}
726748
}
727749
toku_ftnode_free(&ftnode);
@@ -748,24 +770,48 @@ toku_ft_status_update_pivot_fetch_reason(ftnode_fetch_extra *bfe)
748770
}
749771
}
750772

751-
int toku_ftnode_fetch_callback (CACHEFILE UU(cachefile), PAIR p, int fd, BLOCKNUM blocknum, uint32_t fullhash,
752-
void **ftnode_pv, void** disk_data, PAIR_ATTR *sizep, int *dirtyp, void *extraargs) {
773+
int toku_ftnode_fetch_callback(CACHEFILE UU(cachefile),
774+
PAIR p,
775+
int fd,
776+
BLOCKNUM blocknum,
777+
uint32_t fullhash,
778+
void **ftnode_pv,
779+
void **disk_data,
780+
PAIR_ATTR *sizep,
781+
int *dirtyp,
782+
void *extraargs) {
753783
assert(extraargs);
754-
assert(*ftnode_pv == NULL);
755-
FTNODE_DISK_DATA* ndd = (FTNODE_DISK_DATA*)disk_data;
784+
assert(*ftnode_pv == nullptr);
785+
FTNODE_DISK_DATA *ndd = (FTNODE_DISK_DATA *)disk_data;
756786
ftnode_fetch_extra *bfe = (ftnode_fetch_extra *)extraargs;
757-
FTNODE *node=(FTNODE*)ftnode_pv;
787+
FTNODE *node = (FTNODE *)ftnode_pv;
758788
// deserialize the node, must pass the bfe in because we cannot
759789
// evaluate what piece of the the node is necessary until we get it at
760790
// least partially into memory
761-
int r = toku_deserialize_ftnode_from(fd, blocknum, fullhash, node, ndd, bfe);
791+
int r =
792+
toku_deserialize_ftnode_from(fd, blocknum, fullhash, node, ndd, bfe);
762793
if (r != 0) {
763794
if (r == TOKUDB_BAD_CHECKSUM) {
764-
fprintf(stderr,
765-
"Checksum failure while reading node in file %s.\n",
766-
toku_cachefile_fname_in_env(cachefile));
795+
fprintf(
796+
stderr,
797+
"%s:%d:toku_ftnode_fetch_callback - "
798+
"file[%s], blocknum[%ld], toku_deserialize_ftnode_from "
799+
"failed with a checksum error.\n",
800+
__FILE__,
801+
__LINE__,
802+
toku_cachefile_fname_in_env(cachefile),
803+
blocknum.b);
767804
} else {
768-
fprintf(stderr, "Error deserializing node, errno = %d", r);
805+
fprintf(
806+
stderr,
807+
"%s:%d:toku_ftnode_fetch_callback - "
808+
"file[%s], blocknum[%ld], toku_deserialize_ftnode_from "
809+
"failed with %d.\n",
810+
__FILE__,
811+
__LINE__,
812+
toku_cachefile_fname_in_env(cachefile),
813+
blocknum.b,
814+
r);
769815
}
770816
// make absolutely sure we crash before doing anything else.
771817
abort();
@@ -774,7 +820,8 @@ int toku_ftnode_fetch_callback (CACHEFILE UU(cachefile), PAIR p, int fd, BLOCKNU
774820
if (r == 0) {
775821
*sizep = make_ftnode_pair_attr(*node);
776822
(*node)->ct_pair = p;
777-
*dirtyp = (*node)->dirty; // deserialize could mark the node as dirty (presumably for upgrade)
823+
*dirtyp = (*node)->dirty; // deserialize could mark the node as dirty
824+
// (presumably for upgrade)
778825
}
779826
return r;
780827
}
@@ -947,6 +994,16 @@ int toku_ftnode_pe_callback(void *ftnode_pv,
947994
basements_to_destroy[num_basements_to_destroy++] = bn;
948995
toku_ft_decrease_stats(&ft->in_memory_stats,
949996
bn->stat64_delta);
997+
// A basement node is being partially evicted.
998+
// This masement node may have had messages applied to it to
999+
// satisfy a query, but was never actually dirtied.
1000+
// This message application may have updated the trees
1001+
// logical row count. Since these message applications are
1002+
// not being persisted, we need undo the logical row count
1003+
// adjustments as they may occur again in the future if/when
1004+
// the node is re-read from disk for another query or change.
1005+
toku_ft_adjust_logical_row_count(ft,
1006+
-bn->logical_rows_delta);
9501007
set_BNULL(node, i);
9511008
BP_STATE(node, i) = PT_ON_DISK;
9521009
num_partial_evictions++;

storage/tokudb/PerconaFT/ft/ft.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,8 @@ int toku_read_ft_and_store_in_cachefile (FT_HANDLE ft_handle, CACHEFILE cf, LSN
435435
}
436436

437437
int fd = toku_cachefile_get_fd(cf);
438-
int r = toku_deserialize_ft_from(fd, max_acceptable_lsn, &ft);
438+
const char *fn = toku_cachefile_fname_in_env(cf);
439+
int r = toku_deserialize_ft_from(fd, fn, max_acceptable_lsn, &ft);
439440
if (r == TOKUDB_BAD_CHECKSUM) {
440441
fprintf(stderr, "Checksum failure while reading header in file %s.\n", toku_cachefile_fname_in_env(cf));
441442
assert(false); // make absolutely sure we crash before doing anything else

storage/tokudb/PerconaFT/ft/node.cc

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ void toku_destroy_ftnode_internals(FTNODE node) {
9393
if (node->height > 0) {
9494
destroy_nonleaf_childinfo(BNC(node,i));
9595
} else {
96+
paranoid_invariant(BLB_LRD(node, i) == 0);
9697
destroy_basement_node(BLB(node, i));
9798
}
9899
} else if (BP_STATE(node,i) == PT_COMPRESSED) {
@@ -386,15 +387,15 @@ static void bnc_apply_messages_to_basement_node(
386387
const pivot_bounds &
387388
bounds, // contains pivot key bounds of this basement node
388389
txn_gc_info *gc_info,
389-
bool *msgs_applied,
390-
int64_t* logical_rows_delta) {
390+
bool *msgs_applied) {
391391
int r;
392392
NONLEAF_CHILDINFO bnc = BNC(ancestor, childnum);
393393

394394
// Determine the offsets in the message trees between which we need to
395395
// apply messages from this buffer
396396
STAT64INFO_S stats_delta = {0, 0};
397397
uint64_t workdone_this_ancestor = 0;
398+
int64_t logical_rows_delta = 0;
398399

399400
uint32_t stale_lbi, stale_ube;
400401
if (!bn->stale_ancestor_messages_applied) {
@@ -470,7 +471,7 @@ static void bnc_apply_messages_to_basement_node(
470471
gc_info,
471472
&workdone_this_ancestor,
472473
&stats_delta,
473-
logical_rows_delta);
474+
&logical_rows_delta);
474475
}
475476
} else if (stale_lbi == stale_ube) {
476477
// No stale messages to apply, we just apply fresh messages, and mark
@@ -482,7 +483,7 @@ static void bnc_apply_messages_to_basement_node(
482483
.gc_info = gc_info,
483484
.workdone = &workdone_this_ancestor,
484485
.stats_to_update = &stats_delta,
485-
.logical_rows_delta = logical_rows_delta};
486+
.logical_rows_delta = &logical_rows_delta};
486487
if (fresh_ube - fresh_lbi > 0)
487488
*msgs_applied = true;
488489
r = bnc->fresh_message_tree
@@ -503,7 +504,7 @@ static void bnc_apply_messages_to_basement_node(
503504
.gc_info = gc_info,
504505
.workdone = &workdone_this_ancestor,
505506
.stats_to_update = &stats_delta,
506-
.logical_rows_delta = logical_rows_delta};
507+
.logical_rows_delta = &logical_rows_delta};
507508

508509
r = bnc->stale_message_tree
509510
.iterate_on_range<struct iterate_do_bn_apply_msg_extra,
@@ -521,6 +522,8 @@ static void bnc_apply_messages_to_basement_node(
521522
if (stats_delta.numbytes || stats_delta.numrows) {
522523
toku_ft_update_stats(&t->ft->in_memory_stats, stats_delta);
523524
}
525+
toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta);
526+
bn->logical_rows_delta += logical_rows_delta;
524527
}
525528

526529
static void
@@ -534,7 +537,6 @@ apply_ancestors_messages_to_bn(
534537
bool* msgs_applied
535538
)
536539
{
537-
int64_t logical_rows_delta = 0;
538540
BASEMENTNODE curr_bn = BLB(node, childnum);
539541
const pivot_bounds curr_bounds = bounds.next_bounds(node, childnum);
540542
for (ANCESTORS curr_ancestors = ancestors; curr_ancestors; curr_ancestors = curr_ancestors->next) {
@@ -547,16 +549,13 @@ apply_ancestors_messages_to_bn(
547549
curr_ancestors->childnum,
548550
curr_bounds,
549551
gc_info,
550-
msgs_applied,
551-
&logical_rows_delta
552+
msgs_applied
552553
);
553554
// We don't want to check this ancestor node again if the
554555
// next time we query it, the msn hasn't changed.
555556
curr_bn->max_msn_applied = curr_ancestors->node->max_msn_applied_to_node_on_disk;
556557
}
557558
}
558-
toku_ft_adjust_logical_row_count(t->ft, logical_rows_delta);
559-
node->logical_rows_delta += logical_rows_delta;
560559
// At this point, we know all the stale messages above this
561560
// basement node have been applied, and any new messages will be
562561
// fresh, so we don't need to look at stale messages for this

storage/tokudb/PerconaFT/ft/node.h

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -175,11 +175,6 @@ struct ftnode {
175175
int height;
176176
int dirty;
177177
uint32_t fullhash;
178-
// current count of rows add or removed as a result of message application
179-
// to this node as a basement, irrelevant for internal nodes, gets reset
180-
// when node is undirtied. Used to back out tree scoped LRC id node is
181-
// evicted but not persisted
182-
int64_t logical_rows_delta;
183178

184179
// for internal nodes, if n_children==fanout+1 then the tree needs to be
185180
// rebalanced. for leaf nodes, represents number of basement nodes
@@ -211,6 +206,10 @@ struct ftnode_leaf_basement_node {
211206
unsigned int seqinsert; // number of sequential inserts to this leaf
212207
MSN max_msn_applied; // max message sequence number applied
213208
bool stale_ancestor_messages_applied;
209+
// current count of rows added or removed as a result of message application
210+
// to this basement node, gets reset when node is undirtied.
211+
// Used to back out tree scoped LRC id node is evicted but not persisted
212+
int64_t logical_rows_delta;
214213
STAT64INFO_S stat64_delta; // change in stat64 counters since basement was last written to disk
215214
};
216215
typedef struct ftnode_leaf_basement_node *BASEMENTNODE;
@@ -385,6 +384,16 @@ enum reactivity toku_ftnode_get_reactivity(FT ft, FTNODE node);
385384
enum reactivity toku_ftnode_get_nonleaf_reactivity(FTNODE node, unsigned int fanout);
386385
enum reactivity toku_ftnode_get_leaf_reactivity(FTNODE node, uint32_t nodesize);
387386

387+
inline const char* toku_ftnode_get_cachefile_fname_in_env(FTNODE node) {
388+
if (node->ct_pair) {
389+
CACHEFILE cf = toku_pair_get_cachefile(node->ct_pair);
390+
if (cf) {
391+
return toku_cachefile_fname_in_env(cf);
392+
}
393+
}
394+
return nullptr;
395+
}
396+
388397
/**
389398
* Finds the next child for HOT to flush to, given that everything up to
390399
* and including k has been flattened.
@@ -577,3 +586,4 @@ static inline void set_BSB(FTNODE node, int i, struct sub_block *sb) {
577586
#define BLB_DATA(node,i) (&(BLB(node,i)->data_buffer))
578587
#define BLB_NBYTESINDATA(node,i) (BLB_DATA(node,i)->get_disk_size())
579588
#define BLB_SEQINSERT(node,i) (BLB(node,i)->seqinsert)
589+
#define BLB_LRD(node, i) (BLB(node,i)->logical_rows_delta)

0 commit comments

Comments
 (0)