Skip to content

Commit

Permalink
Smooth the deletion of WAL files (#5116)
Browse files Browse the repository at this point in the history
Summary:
WAL files are currently not subject to deletion rate limiting by DeleteScheduler. If the size of the WAL files is significant, this can cause a high delete rate on SSDs that may affect other operations. To fix it, force WAL file deletions to go through the SstFileManager. Original PR for this is #2768
Pull Request resolved: #5116

Differential Revision: D14669437

Pulled By: anand1976

fbshipit-source-id: c5f62d0640cebaa1574de841a1d01e4ce2faadf0
  • Loading branch information
anand76 authored and facebook-github-bot committed Mar 28, 2019
1 parent a98317f commit dae3b55
Show file tree
Hide file tree
Showing 8 changed files with 108 additions and 35 deletions.
13 changes: 8 additions & 5 deletions db/db_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2878,8 +2878,8 @@ Status DestroyDB(const std::string& dbname, const Options& options,
std::string path_to_delete = dbname + "/" + fname;
if (type == kMetaDatabase) {
del = DestroyDB(path_to_delete, options);
} else if (type == kTableFile) {
del = DeleteSSTFile(&soptions, path_to_delete, dbname);
} else if (type == kTableFile || type == kLogFile) {
del = DeleteDBFile(&soptions, path_to_delete, dbname);
} else {
del = env->DeleteFile(path_to_delete);
}
Expand Down Expand Up @@ -2913,7 +2913,7 @@ Status DestroyDB(const std::string& dbname, const Options& options,
if (ParseFileName(fname, &number, &type) &&
type == kTableFile) { // Lock file will be deleted at end
std::string table_path = path + "/" + fname;
Status del = DeleteSSTFile(&soptions, table_path, dbname);
Status del = DeleteDBFile(&soptions, table_path, dbname);
if (result.ok() && !del.ok()) {
result = del;
}
Expand All @@ -2939,7 +2939,8 @@ Status DestroyDB(const std::string& dbname, const Options& options,
// Delete archival files.
for (const auto& file : archiveFiles) {
if (ParseFileName(file, &number, &type) && type == kLogFile) {
Status del = env->DeleteFile(archivedir + "/" + file);
Status del =
DeleteDBFile(&soptions, archivedir + "/" + file, archivedir);
if (result.ok() && !del.ok()) {
result = del;
}
Expand All @@ -2952,7 +2953,9 @@ Status DestroyDB(const std::string& dbname, const Options& options,
if (wal_dir_exists) {
for (const auto& file : walDirFiles) {
if (ParseFileName(file, &number, &type) && type == kLogFile) {
Status del = env->DeleteFile(LogFileName(soptions.wal_dir, number));
Status del =
DeleteDBFile(&soptions, LogFileName(soptions.wal_dir, number),
soptions.wal_dir);
if (result.ok() && !del.ok()) {
result = del;
}
Expand Down
4 changes: 2 additions & 2 deletions db/db_impl_files.cc
Original file line number Diff line number Diff line change
Expand Up @@ -259,9 +259,9 @@ void DBImpl::DeleteObsoleteFileImpl(int job_id, const std::string& fname,
const std::string& path_to_sync,
FileType type, uint64_t number) {
Status file_deletion_status;
if (type == kTableFile) {
if (type == kTableFile || type == kLogFile) {
file_deletion_status =
DeleteSSTFile(&immutable_db_options_, fname, path_to_sync);
DeleteDBFile(&immutable_db_options_, fname, path_to_sync);
} else {
file_deletion_status = env_->DeleteFile(fname);
}
Expand Down
87 changes: 77 additions & 10 deletions db/db_sst_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -367,14 +367,16 @@ TEST_F(DBSSTTest, RateLimitedDelete) {
auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
sfm->delete_scheduler()->SetMaxTrashDBRatio(1.1);

WriteOptions wo;
wo.disableWAL = true;
ASSERT_OK(TryReopen(options));
// Create 4 files in L0
for (char v = 'a'; v <= 'd'; v++) {
ASSERT_OK(Put("Key2", DummyString(1024, v)));
ASSERT_OK(Put("Key3", DummyString(1024, v)));
ASSERT_OK(Put("Key4", DummyString(1024, v)));
ASSERT_OK(Put("Key1", DummyString(1024, v)));
ASSERT_OK(Put("Key4", DummyString(1024, v)));
ASSERT_OK(Put("Key2", DummyString(1024, v), wo));
ASSERT_OK(Put("Key3", DummyString(1024, v), wo));
ASSERT_OK(Put("Key4", DummyString(1024, v), wo));
ASSERT_OK(Put("Key1", DummyString(1024, v), wo));
ASSERT_OK(Put("Key4", DummyString(1024, v), wo));
ASSERT_OK(Flush());
}
// We created 4 sst files in L0
Expand Down Expand Up @@ -408,6 +410,55 @@ TEST_F(DBSSTTest, RateLimitedDelete) {
rocksdb::SyncPoint::GetInstance()->DisableProcessing();
}

TEST_F(DBSSTTest, RateLimitedWALDelete) {
Destroy(last_options_);

std::vector<uint64_t> penalties;
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DeleteScheduler::BackgroundEmptyTrash:Wait",
[&](void* arg) { penalties.push_back(*(static_cast<uint64_t*>(arg))); });

env_->no_slowdown_ = true;
env_->time_elapse_only_sleep_ = true;
Options options = CurrentOptions();
options.disable_auto_compactions = true;
options.env = env_;

int64_t rate_bytes_per_sec = 1024 * 10; // 10 Kbs / Sec
Status s;
options.sst_file_manager.reset(
NewSstFileManager(env_, nullptr, "", 0, false, &s, 0));
ASSERT_OK(s);
options.sst_file_manager->SetDeleteRateBytesPerSecond(rate_bytes_per_sec);
auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());
sfm->delete_scheduler()->SetMaxTrashDBRatio(2.1);

ASSERT_OK(TryReopen(options));
rocksdb::SyncPoint::GetInstance()->EnableProcessing();

// Create 4 files in L0
for (char v = 'a'; v <= 'd'; v++) {
ASSERT_OK(Put("Key2", DummyString(1024, v)));
ASSERT_OK(Put("Key3", DummyString(1024, v)));
ASSERT_OK(Put("Key4", DummyString(1024, v)));
ASSERT_OK(Put("Key1", DummyString(1024, v)));
ASSERT_OK(Put("Key4", DummyString(1024, v)));
ASSERT_OK(Flush());
}
// We created 4 sst files in L0
ASSERT_EQ("4", FilesPerLevel(0));

// Compaction will move the 4 files in L0 to trash and create 1 L1 file
ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
ASSERT_OK(dbfull()->TEST_WaitForCompact(true));
ASSERT_EQ("0,1", FilesPerLevel(0));

sfm->WaitForEmptyTrash();
ASSERT_EQ(penalties.size(), 8);

rocksdb::SyncPoint::GetInstance()->DisableProcessing();
}

TEST_F(DBSSTTest, OpenDBWithExistingTrash) {
Options options = CurrentOptions();

Expand Down Expand Up @@ -446,7 +497,6 @@ TEST_F(DBSSTTest, DeleteSchedulerMultipleDBPaths) {
rocksdb::SyncPoint::GetInstance()->SetCallBack(
"DeleteScheduler::DeleteFile",
[&](void* /*arg*/) { bg_delete_file++; });
rocksdb::SyncPoint::GetInstance()->EnableProcessing();

Options options = CurrentOptions();
options.disable_auto_compactions = true;
Expand All @@ -464,10 +514,14 @@ TEST_F(DBSSTTest, DeleteSchedulerMultipleDBPaths) {
auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());

DestroyAndReopen(options);
rocksdb::SyncPoint::GetInstance()->EnableProcessing();

WriteOptions wo;
wo.disableWAL = true;

// Create 4 files in L0
for (int i = 0; i < 4; i++) {
ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'A')));
ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'A'), wo));
ASSERT_OK(Flush());
}
// We created 4 sst files in L0
Expand All @@ -483,7 +537,7 @@ TEST_F(DBSSTTest, DeleteSchedulerMultipleDBPaths) {

// Create 4 files in L0
for (int i = 4; i < 8; i++) {
ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'B')));
ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'B'), wo));
ASSERT_OK(Flush());
}
ASSERT_EQ("4,1", FilesPerLevel(0));
Expand Down Expand Up @@ -538,14 +592,27 @@ TEST_F(DBSSTTest, DestroyDBWithRateLimitedDelete) {
// Close DB and destroy it using DeleteScheduler
Close();

int num_sst_files = 0;
int num_wal_files = 0;
std::vector<std::string> db_files;
env_->GetChildren(dbname_, &db_files);
for (std::string f : db_files) {
if (f.substr(f.find_last_of(".") + 1) == "sst") {
num_sst_files++;
} else if (f.substr(f.find_last_of(".") + 1) == "log") {
num_wal_files++;
}
}
ASSERT_GT(num_sst_files, 0);
ASSERT_GT(num_wal_files, 0);

auto sfm = static_cast<SstFileManagerImpl*>(options.sst_file_manager.get());

sfm->SetDeleteRateBytesPerSecond(1024 * 1024);
sfm->delete_scheduler()->SetMaxTrashDBRatio(1.1);
ASSERT_OK(DestroyDB(dbname_, options));
sfm->WaitForEmptyTrash();
// We have deleted the 4 sst files in the delete_scheduler
ASSERT_EQ(bg_delete_file, 4);
ASSERT_EQ(bg_delete_file, num_sst_files + num_wal_files);
}

TEST_F(DBSSTTest, DBWithMaxSpaceAllowed) {
Expand Down
8 changes: 5 additions & 3 deletions db/wal_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "util/cast_util.h"
#include "util/coding.h"
#include "util/file_reader_writer.h"
#include "util/file_util.h"
#include "util/filename.h"
#include "util/logging.h"
#include "util/mutexlock.h"
Expand Down Expand Up @@ -190,7 +191,7 @@ void WalManager::PurgeObsoleteWALFiles() {
continue;
}
if (now_seconds - file_m_time > db_options_.wal_ttl_seconds) {
s = env_->DeleteFile(file_path);
s = DeleteDBFile(&db_options_, file_path, archival_dir, false);
if (!s.ok()) {
ROCKS_LOG_WARN(db_options_.info_log, "Can't delete file: %s: %s",
file_path.c_str(), s.ToString().c_str());
Expand All @@ -216,7 +217,7 @@ void WalManager::PurgeObsoleteWALFiles() {
log_file_size = std::max(log_file_size, file_size);
++log_files_num;
} else {
s = env_->DeleteFile(file_path);
s = DeleteDBFile(&db_options_, file_path, archival_dir, false);
if (!s.ok()) {
ROCKS_LOG_WARN(db_options_.info_log,
"Unable to delete file: %s: %s", file_path.c_str(),
Expand Down Expand Up @@ -255,7 +256,8 @@ void WalManager::PurgeObsoleteWALFiles() {

for (size_t i = 0; i < files_del_num; ++i) {
std::string const file_path = archived_logs[i]->PathName();
s = env_->DeleteFile(db_options_.wal_dir + "/" + file_path);
s = DeleteDBFile(&db_options_, db_options_.wal_dir + "/" + file_path,
db_options_.wal_dir, false);
if (!s.ok()) {
ROCKS_LOG_WARN(db_options_.info_log, "Unable to delete file: %s: %s",
file_path.c_str(), s.ToString().c_str());
Expand Down
3 changes: 3 additions & 0 deletions include/rocksdb/sst_file_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@ class SstFileManager {

// Create a new SstFileManager that can be shared among multiple RocksDB
// instances to track SST file and control there deletion rate.
// Even though SstFileManager don't track WAL files but it still control
// there deletion rate.
//
// @param env: Pointer to Env object, please see "rocksdb/env.h".
// @param info_log: If not nullptr, info_log will be used to log errors.
Expand All @@ -93,6 +95,7 @@ class SstFileManager {
// this value is set to 1024 (1 Kb / sec) and we deleted a file of size 4 Kb
// in 1 second, we will wait for another 3 seconds before we delete other
// files, Set to 0 to disable deletion rate limiting.
// This option also affect the delete rate of WAL files in the DB.
// @param delete_existing_trash: Deprecated, this argument have no effect, but
// if user provide trash_dir we will schedule deletes for files in the dir
// @param status: If not nullptr, status will contain any errors that happened
Expand Down
8 changes: 2 additions & 6 deletions util/file_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,11 @@ Status CreateFile(Env* env, const std::string& destination,
return dest_writer->Sync(use_fsync);
}

Status DeleteSSTFile(const ImmutableDBOptions* db_options,
const std::string& fname, const std::string& dir_to_sync) {
return DeleteDBFile(db_options, fname, dir_to_sync, false);
}

Status DeleteDBFile(const ImmutableDBOptions* db_options,
const std::string& fname, const std::string& dir_to_sync,
const bool force_bg) {
#ifndef ROCKSDB_LITE
auto sfm =
SstFileManagerImpl* sfm =
static_cast<SstFileManagerImpl*>(db_options->sst_file_manager.get());
if (sfm) {
return sfm->ScheduleFileDeletion(fname, dir_to_sync, force_bg);
Expand All @@ -107,6 +102,7 @@ Status DeleteDBFile(const ImmutableDBOptions* db_options,
(void)dir_to_sync;
(void)force_bg;
// SstFileManager is not supported in ROCKSDB_LITE
// Delete file immediately
return db_options->env->DeleteFile(fname);
#endif
}
Expand Down
11 changes: 4 additions & 7 deletions util/file_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "rocksdb/env.h"
#include "rocksdb/status.h"
#include "rocksdb/types.h"
#include "util/filename.h"

namespace rocksdb {
// use_fsync maps to options.use_fsync, which determines the way that
Expand All @@ -21,13 +22,9 @@ extern Status CopyFile(Env* env, const std::string& source,
extern Status CreateFile(Env* env, const std::string& destination,
const std::string& contents, bool use_fsync);

extern Status DeleteSSTFile(const ImmutableDBOptions* db_options,
const std::string& fname,
const std::string& path_to_sync);

extern Status DeleteDBFile(const ImmutableDBOptions* db_options,
const std::string& fname,
const std::string& path_to_sync,
const bool force_bg);
const std::string& fname,
const std::string& path_to_sync,
const bool force_bg = false);

} // namespace rocksdb
9 changes: 7 additions & 2 deletions utilities/blob_db/blob_db_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -812,7 +812,11 @@ TEST_F(BlobDBTest, SstFileManager) {
Destroy();
// Make sure that DestroyBlobDB() also goes through delete scheduler.
ASSERT_GE(files_scheduled_to_delete, 2);
ASSERT_EQ(0, files_deleted_directly);
// Due to a timing issue, the WAL may or may not be deleted directly. The
// blob file is first scheduled, followed by WAL. If the background trash
// thread does not wake up on time, the WAL file will be directly
// deleted as the trash size will be > DB size
ASSERT_LE(files_deleted_directly, 1);
SyncPoint::GetInstance()->DisableProcessing();
sfm->WaitForEmptyTrash();
}
Expand Down Expand Up @@ -855,7 +859,8 @@ TEST_F(BlobDBTest, SstFileManagerRestart) {
// Make sure that reopening the DB rescan the existing trash files
Open(bdb_options, db_options);
ASSERT_GE(files_scheduled_to_delete, 3);
ASSERT_EQ(0, files_deleted_directly);
// Depending on timing, the WAL file may or may not be directly deleted
ASSERT_LE(files_deleted_directly, 1);

sfm->WaitForEmptyTrash();

Expand Down

0 comments on commit dae3b55

Please sign in to comment.