Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce iterator key comparison for upper/lower bound check #5111

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions db/db_iter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ bool DBIter::FindNextUserEntryInternal(bool skipping, bool prefix_check) {
return false;
}

if (iterate_upper_bound_ != nullptr &&
if (iterate_upper_bound_ != nullptr && !iter_->HintWithinUpperBound() &&
yiwu-arbug marked this conversation as resolved.
Show resolved Hide resolved
user_comparator_.Compare(ikey_.user_key, *iterate_upper_bound_) >= 0) {
break;
}
Expand Down Expand Up @@ -838,7 +838,7 @@ void DBIter::PrevInternal() {
return;
}

if (iterate_lower_bound_ != nullptr &&
if (iterate_lower_bound_ != nullptr && !iter_->HintWithinLowerBound() &&
user_comparator_.Compare(saved_key_.GetUserKey(),
*iterate_lower_bound_) < 0) {
// We've iterated earlier than the user-specified lower bound.
Expand Down
72 changes: 36 additions & 36 deletions db/db_iterator_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1027,48 +1027,48 @@ TEST_P(DBIteratorTest, DBIteratorBoundMultiSeek) {
#endif

TEST_P(DBIteratorTest, DBIteratorBoundOptimizationTest) {
int upper_bound_hits = 0;
Options options = CurrentOptions();
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"BlockBasedTable::BlockEntryIteratorState::KeyReachedUpperBound",
[&upper_bound_hits](void* arg) {
assert(arg != nullptr);
upper_bound_hits += (*static_cast<bool*>(arg) ? 1 : 0);
});
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
options.env = env_;
options.create_if_missing = true;
options.prefix_extractor = nullptr;
BlockBasedTableOptions table_options;
table_options.flush_block_policy_factory =
std::make_shared<FlushBlockEveryKeyPolicyFactory>();
options.table_factory.reset(NewBlockBasedTableFactory(table_options));
for (auto format_version : {2, 3, 4}) {
int upper_bound_hits = 0;
Options options = CurrentOptions();
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"BlockBasedTableIterator:out_of_bound",
[&upper_bound_hits](void*) { upper_bound_hits++; });
rocksdb::SyncPoint::GetInstance()->EnableProcessing();
options.env = env_;
options.create_if_missing = true;
options.prefix_extractor = nullptr;
BlockBasedTableOptions table_options;
table_options.format_version = format_version;
table_options.flush_block_policy_factory =
std::make_shared<FlushBlockEveryKeyPolicyFactory>();
options.table_factory.reset(NewBlockBasedTableFactory(table_options));

DestroyAndReopen(options);
ASSERT_OK(Put("foo1", "bar1"));
ASSERT_OK(Put("foo2", "bar2"));
ASSERT_OK(Put("foo4", "bar4"));
ASSERT_OK(Flush());
DestroyAndReopen(options);
ASSERT_OK(Put("foo1", "bar1"));
ASSERT_OK(Put("foo2", "bar2"));
ASSERT_OK(Put("foo4", "bar4"));
ASSERT_OK(Flush());

Slice ub("foo3");
ReadOptions ro;
ro.iterate_upper_bound = &ub;
Slice ub("foo3");
ReadOptions ro;
ro.iterate_upper_bound = &ub;

std::unique_ptr<Iterator> iter(NewIterator(ro));
std::unique_ptr<Iterator> iter(NewIterator(ro));

iter->Seek("foo");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
ASSERT_EQ(upper_bound_hits, 0);
iter->Seek("foo");
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().compare(Slice("foo1")), 0);
ASSERT_EQ(upper_bound_hits, 0);

iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().compare(Slice("foo2")), 0);
ASSERT_EQ(upper_bound_hits, 0);
iter->Next();
ASSERT_TRUE(iter->Valid());
ASSERT_EQ(iter->key().compare(Slice("foo2")), 0);
ASSERT_EQ(upper_bound_hits, 0);

iter->Next();
ASSERT_FALSE(iter->Valid());
ASSERT_EQ(upper_bound_hits, 1);
iter->Next();
ASSERT_FALSE(iter->Valid());
ASSERT_EQ(upper_bound_hits, 1);
}
}
// TODO(3.13): fix the issue of Seek() + Prev() which might not necessary
// return the biggest key which is smaller than the seek key.
Expand Down
5 changes: 3 additions & 2 deletions db/table_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ InternalIterator* TableCache::NewIterator(
TableReader** table_reader_ptr, HistogramImpl* file_read_hist,
bool for_compaction, Arena* arena, bool skip_filters, int level,
const InternalKey* smallest_compaction_key,
const InternalKey* largest_compaction_key) {
const InternalKey* largest_compaction_key, bool hint_within_upper_bound) {
PERF_TIMER_GUARD(new_table_iterator_nanos);

Status s;
Expand Down Expand Up @@ -247,7 +247,8 @@ InternalIterator* TableCache::NewIterator(
result = NewEmptyInternalIterator<Slice>(arena);
} else {
result = table_reader->NewIterator(options, prefix_extractor, arena,
skip_filters, for_compaction);
skip_filters, for_compaction,
hint_within_upper_bound);
}
if (create_new_table_reader) {
assert(handle == nullptr);
Expand Down
7 changes: 6 additions & 1 deletion db/table_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ class TableCache {
// aggregator. If an error occurs, returns it in a NewErrorInternalIterator
// @param skip_filters Disables loading/accessing the filter block
// @param level The level this table is at, -1 for "not set / don't know"
// @param hint_within_lower_bound hint whether the table is fully within
// iterate_lower_bound.
// @param hint_within_upper_bound hint whether the table is fully within
// iterate_upper_bound.
InternalIterator* NewIterator(
const ReadOptions& options, const EnvOptions& toptions,
const InternalKeyComparator& internal_comparator,
Expand All @@ -58,7 +62,8 @@ class TableCache {
HistogramImpl* file_read_hist = nullptr, bool for_compaction = false,
Arena* arena = nullptr, bool skip_filters = false, int level = -1,
const InternalKey* smallest_compaction_key = nullptr,
const InternalKey* largest_compaction_key = nullptr);
const InternalKey* largest_compaction_key = nullptr,
bool hint_within_upper_bound = false);

// If a seek to internal key "k" in specified file finds an entry,
// call (*handle_result)(arg, found_key, found_value) repeatedly until
Expand Down
36 changes: 33 additions & 3 deletions db/version_set.cc
Original file line number Diff line number Diff line change
Expand Up @@ -508,23 +508,38 @@ class LevelIterator final : public InternalIterator {
assert(Valid());
return file_iter_.key();
}

Slice value() const override {
assert(Valid());
return file_iter_.value();
}

Status status() const override {
return file_iter_.iter() ? file_iter_.status() : Status::OK();
}

bool HintWithinLowerBound() override {
assert(Valid());
return hint_within_lower_bound_;
}

bool HintWithinUpperBound() override {
assert(Valid());
return file_iter_.HintWithinUpperBound();
yiwu-arbug marked this conversation as resolved.
Show resolved Hide resolved
}

void SetPinnedItersMgr(PinnedIteratorsManager* pinned_iters_mgr) override {
pinned_iters_mgr_ = pinned_iters_mgr;
if (file_iter_.iter()) {
file_iter_.SetPinnedItersMgr(pinned_iters_mgr);
}
}

bool IsKeyPinned() const override {
return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
file_iter_.iter() && file_iter_.IsKeyPinned();
}

bool IsValuePinned() const override {
return pinned_iters_mgr_ && pinned_iters_mgr_->PinningEnabled() &&
file_iter_.iter() && file_iter_.IsValuePinned();
Expand All @@ -541,6 +556,11 @@ class LevelIterator final : public InternalIterator {
return flevel_->files[file_index].smallest_key;
}

const Slice& file_largest_key(size_t file_index) {
assert(file_index < flevel_->num_files);
return flevel_->files[file_index].largest_key;
}

bool KeyReachedUpperBound(const Slice& internal_key) {
return read_options_.iterate_upper_bound != nullptr &&
user_comparator_.Compare(ExtractUserKey(internal_key),
Expand All @@ -560,12 +580,21 @@ class LevelIterator final : public InternalIterator {
smallest_compaction_key = (*compaction_boundaries_)[file_index_].smallest;
largest_compaction_key = (*compaction_boundaries_)[file_index_].largest;
}
hint_within_lower_bound_ =
read_options_.iterate_lower_bound != nullptr &&
user_comparator_.Compare(ExtractUserKey(file_smallest_key(file_index_)),
*read_options_.iterate_lower_bound) >= 0;
bool hint_within_upper_bound =
read_options_.iterate_upper_bound != nullptr &&
user_comparator_.Compare(ExtractUserKey(file_largest_key(file_index_)),
*read_options_.iterate_upper_bound) < 0;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do allow users to change upper bound for a new Seek(). So maybe we should do it in InitFileIterator and persistent the result in this iterator, even if we don't create a new file iterator.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good point, will fix.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

wait, the upper bound check result won't change even in InitFileIterator we don't create new file iterator, so it is safe to not updating it in that case. Do I miss anything?

return table_cache_->NewIterator(
read_options_, env_options_, icomparator_, *file_meta.file_metadata,
range_del_agg_, prefix_extractor_,
nullptr /* don't need reference to table */,
file_read_hist_, for_compaction_, nullptr /* arena */, skip_filters_,
level_, smallest_compaction_key, largest_compaction_key);
nullptr /* don't need reference to table */, file_read_hist_,
for_compaction_, nullptr /* arena */, skip_filters_, level_,
smallest_compaction_key, largest_compaction_key,
hint_within_upper_bound);
yiwu-arbug marked this conversation as resolved.
Show resolved Hide resolved
}

TableCache* table_cache_;
Expand All @@ -581,6 +610,7 @@ class LevelIterator final : public InternalIterator {
bool should_sample_;
bool for_compaction_;
bool skip_filters_;
bool hint_within_lower_bound_ = false;
size_t file_index_;
int level_;
RangeDelAggregator* range_del_agg_;
Expand Down
7 changes: 7 additions & 0 deletions table/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,13 @@ class IndexBlockIter final : public BlockIter<BlockHandle> {
value_delta_encoded_ = !value_is_full;
}

Slice user_key() const override {
if (key_includes_seq_) {
return ExtractUserKey(key());
}
return key();
}

virtual BlockHandle value() const override {
assert(Valid());
if (value_delta_encoded_) {
Expand Down
60 changes: 37 additions & 23 deletions table/block_based_table_reader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2346,6 +2346,7 @@ void BlockBasedTableIterator<TBlockIter, TValue>::Seek(const Slice& target) {
block_iter_.Seek(target);

FindKeyForward();
CheckOutOfBound();
assert(
!block_iter_.Valid() ||
(key_includes_seq_ && icomp_.Compare(target, block_iter_.key()) <= 0) ||
Expand Down Expand Up @@ -2409,6 +2410,7 @@ void BlockBasedTableIterator<TBlockIter, TValue>::SeekToFirst() {
InitDataBlock();
block_iter_.SeekToFirst();
FindKeyForward();
CheckOutOfBound();
}

template <class TBlockIter, typename TValue>
Expand Down Expand Up @@ -2486,23 +2488,37 @@ void BlockBasedTableIterator<TBlockIter, TValue>::InitDataBlock() {
key_includes_seq_, index_key_is_full_,
/* get_context */ nullptr, s, prefetch_buffer_.get());
block_iter_points_to_real_block_ = true;
if (read_options_.iterate_upper_bound != nullptr) {
data_block_within_upper_bound_ =
hint_within_upper_bound_ ||
(user_comparator_.Compare(*read_options_.iterate_upper_bound,
index_iter_->user_key()) > 0);
}
}
}

template <class TBlockIter, typename TValue>
void BlockBasedTableIterator<TBlockIter, TValue>::FindKeyForward() {
assert(!is_out_of_bound_);
// TODO the while loop inherits from two-level-iterator. We don't know
// whether a block can be empty so it can be replaced by an "if".
while (!block_iter_.Valid()) {
if (!block_iter_.status().ok()) {
return;
}
bool block_is_out_of_bound = false;
if (read_options_.iterate_upper_bound != nullptr &&
block_iter_points_to_real_block_) {
// Do not set is_out_of_bound_ here. Even when index key is out of bound,
// it can be larger than smallest key of the next file on the level, and
// that file may still be within bound.
block_is_out_of_bound = !data_block_within_upper_bound_;
}
ResetDataIter();
// We used to check the current index key for upperbound.
// It will only save a data reading for a small percentage of use cases,
// so for code simplicity, we removed it. We can add it back if there is a
// significnat performance regression.
if (block_is_out_of_bound) {
// The next block is out of bound. No need to read it.
TEST_SYNC_POINT_CALLBACK("BlockBasedTableIterator:out_of_bound", nullptr);
return;
}
index_iter_->Next();

if (index_iter_->Valid()) {
Expand All @@ -2512,25 +2528,10 @@ void BlockBasedTableIterator<TBlockIter, TValue>::FindKeyForward() {
return;
}
}

// Check upper bound on the current key
bool reached_upper_bound =
(read_options_.iterate_upper_bound != nullptr &&
block_iter_points_to_real_block_ && block_iter_.Valid() &&
user_comparator_.Compare(ExtractUserKey(block_iter_.key()),
*read_options_.iterate_upper_bound) >= 0);
TEST_SYNC_POINT_CALLBACK(
"BlockBasedTable::BlockEntryIteratorState::KeyReachedUpperBound",
&reached_upper_bound);
if (reached_upper_bound) {
is_out_of_bound_ = true;
return;
}
}

template <class TBlockIter, typename TValue>
void BlockBasedTableIterator<TBlockIter, TValue>::FindKeyBackward() {
assert(!is_out_of_bound_);
while (!block_iter_.Valid()) {
if (!block_iter_.status().ok()) {
return;
Expand All @@ -2551,9 +2552,20 @@ void BlockBasedTableIterator<TBlockIter, TValue>::FindKeyBackward() {
// code simplicity.
}

template <class TBlockIter, typename TValue>
void BlockBasedTableIterator<TBlockIter, TValue>::CheckOutOfBound() {
if (read_options_.iterate_upper_bound != nullptr &&
block_iter_points_to_real_block_ && block_iter_.Valid()) {
is_out_of_bound_ = !hint_within_upper_bound_ &&
user_comparator_.Compare(
*read_options_.iterate_upper_bound, user_key()) <= 0;
}
}

InternalIterator* BlockBasedTable::NewIterator(
const ReadOptions& read_options, const SliceTransform* prefix_extractor,
Arena* arena, bool skip_filters, bool for_compaction) {
Arena* arena, bool skip_filters, bool for_compaction,
bool hint_within_upper_bound) {
bool need_upper_bound_check =
PrefixExtractorChanged(rep_->table_properties.get(), prefix_extractor);
const bool kIsNotIndex = false;
Expand All @@ -2567,7 +2579,8 @@ InternalIterator* BlockBasedTable::NewIterator(
!skip_filters && !read_options.total_order_seek &&
prefix_extractor != nullptr,
need_upper_bound_check, prefix_extractor, kIsNotIndex,
true /*key_includes_seq*/, true /*index_key_is_full*/, for_compaction);
true /*key_includes_seq*/, true /*index_key_is_full*/, for_compaction,
hint_within_upper_bound);
} else {
auto* mem =
arena->AllocateAligned(sizeof(BlockBasedTableIterator<DataBlockIter>));
Expand All @@ -2577,7 +2590,8 @@ InternalIterator* BlockBasedTable::NewIterator(
!skip_filters && !read_options.total_order_seek &&
prefix_extractor != nullptr,
need_upper_bound_check, prefix_extractor, kIsNotIndex,
true /*key_includes_seq*/, true /*index_key_is_full*/, for_compaction);
true /*key_includes_seq*/, true /*index_key_is_full*/, for_compaction,
hint_within_upper_bound);
}
}

Expand Down
Loading