Skip to content

Commit 757e756

Browse files
committed
MDEV-22850 Reduce buf_pool.page_hash latch contention
For reads, the buf_pool.page_hash is protected by buf_pool.mutex or by the hash_lock. There is no need to compute or acquire hash_lock if we are not modifying the buf_pool.page_hash. However, the buf_pool.page_hash latch must be held exclusively when changing buf_page_t::in_file(), or if we desire to prevent buf_page_t::can_relocate() or buf_page_t::buf_fix_count() from changing. rw_lock_lock_word_decr(): Add a comment that explains the polling logic. buf_page_t::set_state(): When in_file() is to be changed, assert that an exclusive buf_pool.page_hash latch is being held. Unfortunately we cannot assert this for set_state(BUF_BLOCK_REMOVE_HASH) because set_corrupt_id() may already have been called. buf_LRU_free_page(): Check buf_page_t::can_relocate() before aqcuiring the hash_lock. buf_block_t::initialise(): Initialize also page.buf_fix_count(). buf_page_create(): Initialize buf_fix_count while not holding any mutex or hash_lock. Acquire the hash_lock only for the duration of inserting the block to the buf_pool.page_hash. buf_LRU_old_init(), buf_LRU_add_block(), buf_page_t::belongs_to_unzip_LRU(): Do not assert buf_page_t::in_file(), because buf_page_create() will invoke buf_LRU_add_block() before acquiring hash_lock and buf_page_t::set_state(). buf_pool_t::validate(): Rely on the buf_pool.mutex and do not unnecessarily acquire any buf_pool.page_hash latches. buf_page_init_for_read(): Clarify that we must acquire the hash_lock upfront in order to prevent a race with buf_pool_t::watch_remove().
1 parent 0af1b0b commit 757e756

File tree

5 files changed

+88
-25
lines changed

5 files changed

+88
-25
lines changed

storage/innobase/buf/buf0buf.cc

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3776,15 +3776,16 @@ buf_page_try_get_func(
37763776
}
37773777

37783778
/** Initialize the block.
3779-
@param page_id page id
3780-
@param zip_size ROW_FORMAT=COMPRESSED page size, or 0 */
3781-
void buf_block_t::initialise(const page_id_t page_id, ulint zip_size)
3779+
@param page_id page identifier
3780+
@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
3781+
@param fix initial buf_fix_count() */
3782+
void buf_block_t::initialise(const page_id_t page_id, ulint zip_size,
3783+
uint32_t fix)
37823784
{
37833785
ut_ad(page.state() != BUF_BLOCK_FILE_PAGE);
37843786
buf_block_init_low(this);
37853787
lock_hash_val= lock_rec_hash(page_id.space(), page_id.page_no());
3786-
page.init();
3787-
page.id_= page_id;
3788+
page.init(page_id, fix);
37883789
page_zip_set_size(&page.zip, zip_size);
37893790
}
37903791

@@ -3803,11 +3804,9 @@ buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr)
38033804
ut_ad(page_id.space() != 0 || !zip_size);
38043805

38053806
buf_block_t *free_block= buf_LRU_get_free_block(false);
3806-
free_block->initialise(page_id, zip_size);
3807+
free_block->initialise(page_id, zip_size, 1);
38073808

3808-
rw_lock_t *hash_lock= buf_pool.hash_lock_get(page_id);
38093809
mutex_enter(&buf_pool.mutex);
3810-
rw_lock_x_lock(hash_lock);
38113810

38123811
buf_block_t *block= reinterpret_cast<buf_block_t*>
38133812
(buf_pool.page_hash_get_low(page_id));
@@ -3816,7 +3815,6 @@ buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr)
38163815
!buf_pool.watch_is_sentinel(block->page))
38173816
{
38183817
/* Page can be found in buf_pool */
3819-
rw_lock_x_unlock(hash_lock);
38203818
buf_LRU_block_free_non_file_page(free_block);
38213819
mutex_exit(&buf_pool.mutex);
38223820

@@ -3846,11 +3844,17 @@ buf_page_create(const page_id_t page_id, ulint zip_size, mtr_t *mtr)
38463844
page_id.space(), page_id.page_no()));
38473845

38483846
block= free_block;
3849-
buf_block_buf_fix_inc(block, __FILE__, __LINE__);
3847+
3848+
/* Duplicate buf_block_buf_fix_inc_func() */
3849+
ut_ad(block->page.buf_fix_count() == 1);
3850+
ut_ad(fsp_is_system_temporary(page_id.space()) ||
3851+
rw_lock_s_lock_nowait(block->debug_latch, __FILE__, __LINE__));
38503852

38513853
/* The block must be put to the LRU list */
3852-
block->page.set_state(BUF_BLOCK_FILE_PAGE);
38533854
buf_LRU_add_block(&block->page, false);
3855+
rw_lock_t *hash_lock= buf_pool.hash_lock_get(page_id);
3856+
rw_lock_x_lock(hash_lock);
3857+
block->page.set_state(BUF_BLOCK_FILE_PAGE);
38543858
ut_d(block->page.in_page_hash= true);
38553859
HASH_INSERT(buf_page_t, hash, buf_pool.page_hash, page_id.fold(),
38563860
&block->page);
@@ -4401,7 +4405,6 @@ void buf_pool_t::validate()
44014405
ulint n_zip = 0;
44024406

44034407
mutex_enter(&mutex);
4404-
page_hash_lock_all();
44054408

44064409
chunk_t* chunk = chunks;
44074410

@@ -4492,7 +4495,6 @@ void buf_pool_t::validate()
44924495

44934496
ut_ad(UT_LIST_GET_LEN(flush_list) == n_flushing);
44944497

4495-
page_hash_unlock_all();
44964498
mutex_exit(&flush_list_mutex);
44974499

44984500
if (curr_size == old_size

storage/innobase/buf/buf0lru.cc

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -940,7 +940,6 @@ static void buf_LRU_old_init()
940940
bpage = UT_LIST_GET_PREV(LRU, bpage)) {
941941

942942
ut_ad(bpage->in_LRU_list);
943-
ut_ad(bpage->in_file());
944943

945944
/* This loop temporarily violates the
946945
assertions of buf_page_t::set_old(). */
@@ -1066,7 +1065,6 @@ buf_LRU_add_block(
10661065
the start, regardless of this parameter */
10671066
{
10681067
ut_ad(mutex_own(&buf_pool.mutex));
1069-
ut_a(bpage->in_file());
10701068
ut_ad(!bpage->in_LRU_list);
10711069

10721070
if (!old || (UT_LIST_GET_LEN(buf_pool.LRU) < BUF_LRU_OLD_MIN_LEN)) {
@@ -1153,10 +1151,18 @@ bool buf_LRU_free_page(buf_page_t *bpage, bool zip)
11531151
ut_ad(bpage->in_file());
11541152
ut_ad(bpage->in_LRU_list);
11551153

1154+
/* First, perform a quick check before we acquire hash_lock. */
1155+
if (!bpage->can_relocate()) {
1156+
return false;
1157+
}
1158+
1159+
/* We must hold an exclusive hash_lock to prevent
1160+
bpage->can_relocate() from changing due to a concurrent
1161+
execution of buf_page_get_low(). */
11561162
rw_lock_t* hash_lock = buf_pool.hash_lock_get(id);
11571163
rw_lock_x_lock(hash_lock);
11581164

1159-
if (!bpage->can_relocate()) {
1165+
if (UNIV_UNLIKELY(!bpage->can_relocate())) {
11601166
/* Do not free buffer fixed and I/O-fixed blocks. */
11611167
goto func_exit;
11621168
}

storage/innobase/buf/buf0rea.cc

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
121121

122122
mutex_enter(&buf_pool.mutex);
123123

124+
/* We must acquire hash_lock this early to prevent
125+
a race condition with buf_pool_t::watch_remove() */
124126
rw_lock_t *hash_lock= buf_pool.hash_lock_get(page_id);
125127
rw_lock_x_lock(hash_lock);
126128

@@ -151,8 +153,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
151153
buf_pool.watch_remove(hash_page);
152154
}
153155

154-
block->page.set_state(BUF_BLOCK_FILE_PAGE);
155156
block->page.set_io_fix(BUF_IO_READ);
157+
block->page.set_state(BUF_BLOCK_FILE_PAGE);
156158
ut_ad(!block->page.in_page_hash);
157159
ut_d(block->page.in_page_hash= true);
158160
HASH_INSERT(buf_page_t, hash, buf_pool.page_hash, page_id.fold(), bpage);
@@ -202,8 +204,8 @@ static buf_page_t* buf_page_init_for_read(ulint mode, const page_id_t page_id,
202204
{
203205
/* The block was added by some other thread. */
204206
rw_lock_x_unlock(hash_lock);
205-
buf_buddy_free(data, zip_size);
206-
goto func_exit;
207+
buf_buddy_free(data, zip_size);
208+
goto func_exit;
207209
}
208210
}
209211

storage/innobase/include/buf0buf.h

Lines changed: 36 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -995,6 +995,14 @@ class buf_page_t
995995
buf_fix_count_= buf_fix_count;
996996
}
997997

998+
/** Initialize some more fields */
999+
void init(page_id_t id, uint32_t buf_fix_count= 0)
1000+
{
1001+
init();
1002+
id_= id;
1003+
buf_fix_count_= buf_fix_count;
1004+
}
1005+
9981006
public:
9991007
const page_id_t &id() const { return id_; }
10001008
buf_page_state state() const { return state_; }
@@ -1010,8 +1018,7 @@ class buf_page_t
10101018
/** @return if this belongs to buf_pool.unzip_LRU */
10111019
bool belongs_to_unzip_LRU() const
10121020
{
1013-
ut_ad(in_file());
1014-
return zip.data && state() == BUF_BLOCK_FILE_PAGE;
1021+
return zip.data && state() != BUF_BLOCK_ZIP_PAGE;
10151022
}
10161023

10171024
inline void add_buf_fix_count(uint32_t count);
@@ -1277,9 +1284,10 @@ struct buf_block_t{
12771284
ulint zip_size() const { return page.zip_size(); }
12781285

12791286
/** Initialize the block.
1280-
@param page_id page id
1281-
@param zip_size ROW_FORMAT=COMPRESSED page size, or 0 */
1282-
void initialise(const page_id_t page_id, ulint zip_size);
1287+
@param page_id page identifier
1288+
@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
1289+
@param fix initial buf_fix_count() */
1290+
void initialise(const page_id_t page_id, ulint zip_size, uint32_t fix= 0);
12831291
};
12841292

12851293
/**********************************************************************//**
@@ -2134,6 +2142,29 @@ inline void buf_page_t::set_buf_fix_count(uint32_t count)
21342142
inline void buf_page_t::set_state(buf_page_state state)
21352143
{
21362144
ut_ad(mutex_own(&buf_pool.mutex));
2145+
#ifdef UNIV_DEBUG
2146+
switch (state) {
2147+
case BUF_BLOCK_REMOVE_HASH:
2148+
/* buf_pool_t::corrupted_evict() invokes set_corrupt_id()
2149+
before buf_LRU_free_one_page(), so we cannot assert that
2150+
we are holding the hash_lock. */
2151+
break;
2152+
case BUF_BLOCK_MEMORY:
2153+
if (!in_file()) break;
2154+
/* fall through */
2155+
case BUF_BLOCK_FILE_PAGE:
2156+
ut_ad(rw_lock_own(buf_pool.hash_lock_get(id_), RW_LOCK_X));
2157+
break;
2158+
case BUF_BLOCK_NOT_USED:
2159+
if (!in_file()) break;
2160+
/* fall through */
2161+
case BUF_BLOCK_ZIP_PAGE:
2162+
ut_ad((this >= &buf_pool.watch[0] &&
2163+
this <= &buf_pool.watch[UT_ARR_SIZE(buf_pool.watch)]) ||
2164+
rw_lock_own(buf_pool.hash_lock_get(id_), RW_LOCK_X));
2165+
break;
2166+
}
2167+
#endif
21372168
state_= state;
21382169
}
21392170

@@ -2212,7 +2243,6 @@ inline bool buf_page_t::is_old() const
22122243
/** Set whether a block is old in buf_pool.LRU */
22132244
inline void buf_page_t::set_old(bool old)
22142245
{
2215-
ut_ad(in_file());
22162246
ut_ad(mutex_own(&buf_pool.mutex));
22172247
ut_ad(in_LRU_list);
22182248

storage/innobase/include/sync0rw.ic

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,31 @@ rw_lock_lock_word_decr(
220220

221221
return(true);
222222
}
223+
224+
/* Note that lock_copy was reloaded above. We will
225+
keep trying if a spurious conflict occurred, typically
226+
caused by concurrent executions of
227+
rw_lock_s_lock(). */
228+
229+
#if 1 /* FIXME: MDEV-22871 Spurious contention between rw_lock_s_lock() */
230+
231+
/* When the number of concurrently executing threads
232+
exceeds the number of available processor cores,
233+
multiple buf_pool.page_hash S-latch requests would
234+
conflict here, mostly in buf_page_get_low(). We should
235+
implement a simpler rw-lock where the S-latch
236+
acquisition would be a simple fetch_add(1) followed by
237+
either an optional load() loop to wait for the X-latch
238+
to be released, or a fetch_sub(1) and a retry.
239+
240+
For now, we work around the problem with a delay in
241+
this loop. It helped a little on some systems and was
242+
reducing performance on others. */
223243
(void) LF_BACKOFF();
244+
#endif
224245
}
246+
247+
/* A real conflict was detected. */
225248
return(false);
226249
}
227250

0 commit comments

Comments
 (0)