Skip to content

Commit

Permalink
Add block cache tracer. (facebook#5410)
Browse files Browse the repository at this point in the history
Summary:
This PR adds a help class block cache tracer to read/write block cache accesses. It uses the trace reader/writer to perform this task.
Pull Request resolved: facebook#5410

Differential Revision: D15612843

Pulled By: HaoyuHuang

fbshipit-source-id: f30fd1e1524355ca87db5d533a5c086728b141ea
  • Loading branch information
HaoyuHuang authored and facebook-github-bot committed Jun 6, 2019
1 parent 340ed4f commit aa71718
Show file tree
Hide file tree
Showing 8 changed files with 538 additions and 12 deletions.
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,7 @@ set(SOURCES
tools/sst_dump_tool.cc
tools/trace_analyzer_tool.cc
trace_replay/trace_replay.cc
trace_replay/block_cache_tracer.cc
util/bloom.cc
util/coding.cc
util/compaction_job_stats_impl.cc
Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,7 @@ TESTS = \
range_del_aggregator_test \
sst_file_reader_test \
db_secondary_test \
block_cache_tracer_test \

PARALLEL_TEST = \
backupable_db_test \
Expand Down Expand Up @@ -1588,6 +1589,9 @@ sst_file_reader_test: table/sst_file_reader_test.o $(LIBOBJECTS) $(TESTHARNESS)
db_secondary_test: db/db_impl/db_secondary_test.o db/db_test_util.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)

block_cache_tracer_test: trace_replay/block_cache_tracer_test.o trace_replay/block_cache_tracer.o $(LIBOBJECTS) $(TESTHARNESS)
$(AM_LINK)

#-------------------------------------------------
# make install related stuff
INSTALL_PATH ?= /usr/local
Expand Down
2 changes: 2 additions & 0 deletions src.mk
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ LIB_SOURCES = \
test_util/transaction_test_util.cc \
tools/dump/db_dump_tool.cc \
trace_replay/trace_replay.cc \
trace_replay/block_cache_tracer.cc \
util/bloom.cc \
util/build_version.cc \
util/coding.cc \
Expand Down Expand Up @@ -371,6 +372,7 @@ MAIN_SOURCES = \
tools/reduce_levels_test.cc \
tools/sst_dump_test.cc \
tools/trace_analyzer_test.cc \
trace_replay/block_cache_tracer_test.cc \
util/autovector_test.cc \
util/bloom_test.cc \
util/coding_test.cc \
Expand Down
218 changes: 218 additions & 0 deletions trace_replay/block_cache_tracer.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).

#include "trace_replay/block_cache_tracer.h"

#include "db/db_impl/db_impl.h"
#include "rocksdb/slice.h"
#include "util/coding.h"
#include "util/hash.h"
#include "util/string_util.h"

namespace rocksdb {

namespace {
const unsigned int kCharSize = 1;
bool ShouldTraceReferencedKey(const BlockCacheTraceRecord& record) {
return (record.block_type == TraceType::kBlockTraceDataBlock) &&
(record.caller == BlockCacheLookupCaller::kUserGet ||
record.caller == BlockCacheLookupCaller::kUserMGet);
}
} // namespace

BlockCacheTraceWriter::BlockCacheTraceWriter(
Env* env, const TraceOptions& trace_options,
std::unique_ptr<TraceWriter>&& trace_writer)
: env_(env),
trace_options_(trace_options),
trace_writer_(std::move(trace_writer)) {}

bool BlockCacheTraceWriter::ShouldTrace(
const BlockCacheTraceRecord& record) const {
if (trace_options_.sampling_frequency == 0 ||
trace_options_.sampling_frequency == 1) {
return true;
}
// We use spatial downsampling so that we have a complete access history for a
// block.
const uint64_t hash = GetSliceNPHash64(Slice(record.block_key));
return hash % trace_options_.sampling_frequency == 0;
}

Status BlockCacheTraceWriter::WriteBlockAccess(
const BlockCacheTraceRecord& record) {
uint64_t trace_file_size = trace_writer_->GetFileSize();
if (trace_file_size > trace_options_.max_trace_file_size ||
!ShouldTrace(record)) {
return Status::OK();
}
Trace trace;
trace.ts = record.access_timestamp;
trace.type = record.block_type;
PutLengthPrefixedSlice(&trace.payload, record.block_key);
PutFixed64(&trace.payload, record.block_size);
PutFixed32(&trace.payload, record.cf_id);
PutLengthPrefixedSlice(&trace.payload, record.cf_name);
PutFixed32(&trace.payload, record.level);
PutFixed32(&trace.payload, record.sst_fd_number);
trace.payload.push_back(record.caller);
trace.payload.push_back(record.is_cache_hit);
trace.payload.push_back(record.no_insert);
if (ShouldTraceReferencedKey(record)) {
PutLengthPrefixedSlice(&trace.payload, record.referenced_key);
PutFixed64(&trace.payload, record.num_keys_in_block);
trace.payload.push_back(record.is_referenced_key_exist_in_block);
}
std::string encoded_trace;
TracerHelper::EncodeTrace(trace, &encoded_trace);
InstrumentedMutexLock lock_guard(&trace_writer_mutex_);
return trace_writer_->Write(encoded_trace);
}

Status BlockCacheTraceWriter::WriteHeader() {
Trace trace;
trace.ts = env_->NowMicros();
trace.type = TraceType::kTraceBegin;
PutLengthPrefixedSlice(&trace.payload, kTraceMagic);
PutFixed32(&trace.payload, kMajorVersion);
PutFixed32(&trace.payload, kMinorVersion);
std::string encoded_trace;
TracerHelper::EncodeTrace(trace, &encoded_trace);
InstrumentedMutexLock lock_guard(&trace_writer_mutex_);
return trace_writer_->Write(encoded_trace);
}

BlockCacheTraceReader::BlockCacheTraceReader(
std::unique_ptr<TraceReader>&& reader)
: trace_reader_(std::move(reader)) {}

Status BlockCacheTraceReader::ReadHeader(BlockCacheTraceHeader* header) {
assert(header != nullptr);
std::string encoded_trace;
Status s = trace_reader_->Read(&encoded_trace);
if (!s.ok()) {
return s;
}
Trace trace;
s = TracerHelper::DecodeTrace(encoded_trace, &trace);
if (!s.ok()) {
return s;
}
header->start_time = trace.ts;
Slice enc_slice = Slice(trace.payload);
Slice magnic_number;
if (!GetLengthPrefixedSlice(&enc_slice, &magnic_number)) {
return Status::Corruption(
"Corrupted header in the trace file: Failed to read the magic number.");
}
if (magnic_number.ToString() != kTraceMagic) {
return Status::Corruption(
"Corrupted header in the trace file: Magic number does not match.");
}
if (!GetFixed32(&enc_slice, &header->rocksdb_major_version)) {
return Status::Corruption(
"Corrupted header in the trace file: Failed to read rocksdb major "
"version number.");
}
if (!GetFixed32(&enc_slice, &header->rocksdb_minor_version)) {
return Status::Corruption(
"Corrupted header in the trace file: Failed to read rocksdb minor "
"version number.");
}
// We should have retrieved all information in the header.
if (!enc_slice.empty()) {
return Status::Corruption(
"Corrupted header in the trace file: The length of header is too "
"long.");
}
return Status::OK();
}

Status BlockCacheTraceReader::ReadAccess(BlockCacheTraceRecord* record) {
assert(record);
std::string encoded_trace;
Status s = trace_reader_->Read(&encoded_trace);
if (!s.ok()) {
return s;
}
Trace trace;
s = TracerHelper::DecodeTrace(encoded_trace, &trace);
if (!s.ok()) {
return s;
}
record->access_timestamp = trace.ts;
record->block_type = trace.type;
Slice enc_slice = Slice(trace.payload);
Slice block_key;
if (!GetLengthPrefixedSlice(&enc_slice, &block_key)) {
return Status::Incomplete(
"Incomplete access record: Failed to read block key.");
}
record->block_key = block_key.ToString();
if (!GetFixed64(&enc_slice, &record->block_size)) {
return Status::Incomplete(
"Incomplete access record: Failed to read block size.");
}
if (!GetFixed32(&enc_slice, &record->cf_id)) {
return Status::Incomplete(
"Incomplete access record: Failed to read column family ID.");
}
Slice cf_name;
if (!GetLengthPrefixedSlice(&enc_slice, &cf_name)) {
return Status::Incomplete(
"Incomplete access record: Failed to read column family name.");
}
record->cf_name = cf_name.ToString();
if (!GetFixed32(&enc_slice, &record->level)) {
return Status::Incomplete(
"Incomplete access record: Failed to read level.");
}
if (!GetFixed32(&enc_slice, &record->sst_fd_number)) {
return Status::Incomplete(
"Incomplete access record: Failed to read SST file number.");
}
if (enc_slice.empty()) {
return Status::Incomplete(
"Incomplete access record: Failed to read caller.");
}
record->caller = static_cast<BlockCacheLookupCaller>(enc_slice[0]);
enc_slice.remove_prefix(kCharSize);
if (enc_slice.empty()) {
return Status::Incomplete(
"Incomplete access record: Failed to read is_cache_hit.");
}
record->is_cache_hit = static_cast<Boolean>(enc_slice[0]);
enc_slice.remove_prefix(kCharSize);
if (enc_slice.empty()) {
return Status::Incomplete(
"Incomplete access record: Failed to read no_insert.");
}
record->no_insert = static_cast<Boolean>(enc_slice[0]);
enc_slice.remove_prefix(kCharSize);

if (ShouldTraceReferencedKey(*record)) {
Slice referenced_key;
if (!GetLengthPrefixedSlice(&enc_slice, &referenced_key)) {
return Status::Incomplete(
"Incomplete access record: Failed to read the referenced key.");
}
record->referenced_key = referenced_key.ToString();
if (!GetFixed64(&enc_slice, &record->num_keys_in_block)) {
return Status::Incomplete(
"Incomplete access record: Failed to read the number of keys in the "
"block.");
}
if (enc_slice.empty()) {
return Status::Incomplete(
"Incomplete access record: Failed to read "
"is_referenced_key_exist_in_block.");
}
record->is_referenced_key_exist_in_block =
static_cast<Boolean>(enc_slice[0]);
}
return Status::OK();
}

} // namespace rocksdb
105 changes: 105 additions & 0 deletions trace_replay/block_cache_tracer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
// Copyright (c) 2011-present, Facebook, Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).

#pragma once

#include "monitoring/instrumented_mutex.h"
#include "rocksdb/env.h"
#include "rocksdb/options.h"
#include "rocksdb/trace_reader_writer.h"
#include "trace_replay/trace_replay.h"

namespace rocksdb {

enum BlockCacheLookupCaller : char {
kUserGet = 1,
kUserMGet = 2,
kUserIterator = 3,
kPrefetch = 4,
kCompaction = 5,
// All callers should be added before kMaxBlockCacheLookupCaller.
kMaxBlockCacheLookupCaller
};

enum Boolean : char { kTrue = 1, kFalse = 0 };

struct BlockCacheTraceRecord {
// Required fields for all accesses.
uint64_t access_timestamp;
std::string block_key;
TraceType block_type;
uint64_t block_size;
uint32_t cf_id;
std::string cf_name;
uint32_t level;
uint32_t sst_fd_number;
BlockCacheLookupCaller caller;
Boolean is_cache_hit;
Boolean no_insert;

// Required fields for data block and user Get/Multi-Get only.
std::string referenced_key;
uint64_t num_keys_in_block = 0;
Boolean is_referenced_key_exist_in_block = Boolean::kFalse;
};

struct BlockCacheTraceHeader {
uint64_t start_time;
uint32_t rocksdb_major_version;
uint32_t rocksdb_minor_version;
};

// BlockCacheTraceWriter captures all RocksDB block cache accesses using a
// user-provided TraceWriter. Every RocksDB operation is written as a single
// trace. Each trace will have a timestamp and type, followed by the trace
// payload.
class BlockCacheTraceWriter {
public:
BlockCacheTraceWriter(Env* env, const TraceOptions& trace_options,
std::unique_ptr<TraceWriter>&& trace_writer);
~BlockCacheTraceWriter() = default;
// No copy and move.
BlockCacheTraceWriter(const BlockCacheTraceWriter&) = delete;
BlockCacheTraceWriter& operator=(const BlockCacheTraceWriter&) = delete;
BlockCacheTraceWriter(BlockCacheTraceWriter&&) = delete;
BlockCacheTraceWriter& operator=(BlockCacheTraceWriter&&) = delete;

Status WriteBlockAccess(const BlockCacheTraceRecord& record);

// Write a trace header at the beginning, typically on initiating a trace,
// with some metadata like a magic number and RocksDB version.
Status WriteHeader();

private:
bool ShouldTrace(const BlockCacheTraceRecord& record) const;

Env* env_;
TraceOptions trace_options_;
std::unique_ptr<TraceWriter> trace_writer_;
/*Mutex to protect trace_writer_ */
InstrumentedMutex trace_writer_mutex_;
};

// BlockCacheTraceReader helps read the trace file generated by
// BlockCacheTraceWriter using a user provided TraceReader.
class BlockCacheTraceReader {
public:
BlockCacheTraceReader(std::unique_ptr<TraceReader>&& reader);
~BlockCacheTraceReader() = default;
// No copy and move.
BlockCacheTraceReader(const BlockCacheTraceReader&) = delete;
BlockCacheTraceReader& operator=(const BlockCacheTraceReader&) = delete;
BlockCacheTraceReader(BlockCacheTraceReader&&) = delete;
BlockCacheTraceReader& operator=(BlockCacheTraceReader&&) = delete;

Status ReadHeader(BlockCacheTraceHeader* header);

Status ReadAccess(BlockCacheTraceRecord* record);

private:
std::unique_ptr<TraceReader> trace_reader_;
};

} // namespace rocksdb
Loading

0 comments on commit aa71718

Please sign in to comment.