Skip to content

Commit

Permalink
env: Add Zoned Namespace SSD support on TerarkDB (#129)
Browse files Browse the repository at this point in the history
* env: Add Zoned Namespace SSD support on TerarkDB

Support zone namespace SSD by transplanting and modifying ZenFS
from Hans Holmberg of WesternDigital Inc.

https://github.com/westerndigitalcorporation/zenfs

Since TerarkDB is based on RocksDB v5.18.3, we also made some adoption
to the codebase.

In this version, the `zenfs` is not fully functional yet but prepared the basic function & utilities for next stage integration.

Signed-off-by: Changlong Chen <chenchanglong@bytedance.com>
Signed-off-by: Kuankuan Guo <guokuankuan@bytedance.com>

Co-authored-by: Hans Holmberg <hans.holmberg@wdc.com>
Co-authored-by: Yuanliang Wang <wangyuanliang@bytedance.com>
Co-authored-by: Changlong Chen <chenchanglong@bytedance.com>
  • Loading branch information
4 people authored and mm304321141 committed Sep 17, 2021
1 parent a24bf87 commit 6eecaa2
Show file tree
Hide file tree
Showing 15 changed files with 563 additions and 7 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ build/

ldb
manifest_dump
zenfs
sst_dump
blob_dump
column_aware_encoding_exp
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,6 @@
[submodule "third-party/gflags"]
path = third-party/gflags
url = https://github.com/gflags/gflags.git
[submodule "third-party/zenfs"]
path = third-party/zenfs
url = https://github.com/bzbd/zenfs.git
25 changes: 24 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ include(CMakeDependentOption)
CMAKE_DEPENDENT_OPTION(WITH_TESTS "build with tests" ON "CMAKE_BUILD_TYPE STREQUAL Debug" OFF)
option(WITH_TOOLS "build with tools" OFF)
option(WITH_TERARK_ZIP "build with TerarkZipTable support" ON)
option(WITH_ZENFS "build with experimental zenfs" OFF)
option(WITH_DIAGNOSE_CACHE "build with diagnosable cache support" OFF)
option(WITH_BOOSTLIB "build with boost, if WITH_TERARK_ZIP is ON, this will also set to ON" OFF)
option(WITH_TERARKDB_NAMESPACE "namespace" "terarkdb")
Expand All @@ -65,6 +66,10 @@ if (WITH_WINDOWS_UTF8_FILENAMES)
add_definitions(-DROCKSDB_WINDOWS_UTF8_FILENAMES)
endif()

if(WITH_ZENFS)
add_compile_definitions(ROCKSDB_NAMESPACE=${WITH_TERARKDB_NAMESPACE})
endif()

IF(FORCE_TERARKDB_RELEASE_BUILD)
SET(CMAKE_BUILD_TYPE "Release")
ENDIF()
Expand Down Expand Up @@ -374,6 +379,10 @@ if(WITH_TBB)
list(APPEND THIRDPARTY_LIBS ${TBB_LIBRARIES})
endif()

if(WITH_ZENFS)
list(APPEND THIRDPARTY_LIBS zbd)
endif()

# Stall notifications eat some performance from inserts
option(DISABLE_STALL_NOTIF "Build with stall notifications" OFF)
if(DISABLE_STALL_NOTIF)
Expand Down Expand Up @@ -827,6 +836,15 @@ if(WIN32)
port/port_posix.cc
env/env_posix.cc
env/io_posix.cc)

if(WITH_ZENFS)
list(APPEND SOURCES
third-party/zenfs/fs/fs_zenfs.cc
third-party/zenfs/fs/io_zenfs.cc
third-party/zenfs/fs/zbd_zenfs.cc
third-party/zenfs/util/zenfs.cc
tools/zenfs_tool.cc)
endif()
endif()

set(ROCKSDB_STATIC_LIB terarkdb${ARTIFACT_SUFFIX})
Expand Down Expand Up @@ -919,7 +937,7 @@ if(NOT WIN32 OR ROCKSDB_INSTALL_ON_WINDOWS)
# We should also expose terarkdb's source dir
INSTALL(DIRECTORY memtable monitoring env db cache options port table util utilities
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
FILES_MATCHING
FILES_MATCHING
PATTERN "*.h"
PATTERN "*.hpp"
)
Expand Down Expand Up @@ -1184,6 +1202,11 @@ if(WITH_TOOLS)
$<TARGET_OBJECTS:testharness>)
target_link_libraries(db_bench${ARTIFACT_SUFFIX} gtest ${ROCKSDB_STATIC_LIB})

add_executable(zenfs${ARTIFACT_SUFFIX} tools/zenfs.cc
tools/zenfs_tool.cc
$<TARGET_OBJECTS:testharness>)
target_link_libraries(zenfs${ARTIFACT_SUFFIX} gtest ${ROCKSDB_STATIC_LIB})

if(WITH_TERARK_ZIP)
add_subdirectory(terark-tools/terark-test)
endif()
Expand Down
1 change: 1 addition & 0 deletions bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ def bench(records, key_size, value_size, engine, db_dir, exist_db):
""" % BENCH_ARGS[0]
cmd = """
{db_bench} \
--fs_uri=/dev/nvme3n2
--benchmarks={bench_type}
--use_existing_db={exist_db}
--sync=1
Expand Down
13 changes: 13 additions & 0 deletions build_tools/build_detect_platform
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,19 @@ EOF
fi
fi

if ! test $ROCKSDB_DISABLE_LZBD; then
# Test whether libzbd is installed
$CXX $CFLAGS $COMMON_FLAGS -x c++ - -o /dev/null 2>/dev/null <<EOF
#include <libzbd/zbd.h>
int main() {}
EOF
if [ "$?" = 0 ]; then
COMMON_FLAGS="$COMMON_FLAGS -DLIBZBD"
PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -lzbd"
JAVA_LDFLAGS="$JAVA_LDFLAGS -lzbd"
fi
fi

if ! test $ROCKSDB_DISABLE_NUMA; then
# Test whether numa is available
$CXX $CFLAGS -x c++ - -o /dev/null -lnuma 2>/dev/null <<EOF
Expand Down
37 changes: 37 additions & 0 deletions env/io_posix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1115,4 +1115,41 @@ Status PosixDirectory::Fsync() {
return Status::OK();
}
} // namespace TERARKDB_NAMESPACE

#ifdef ZENFS_READY
static FactoryFunc<FileSystem> zenfs_filesystem_reg =
ObjectLibrary::Default()->Register<FileSystem>(
"zenfs://.*", [](const std::string& uri, std::unique_ptr<FileSystem>* f,
std::string* errmsg) {
std::string devID = uri;
FileSystem* fs = nullptr;
Status s;

devID.replace(0, strlen("zenfs://"), "");
if (devID.rfind("dev:") == 0) {
devID.replace(0, strlen("dev:"), "");
s = NewZenFS(&fs, devID);
if (!s.ok()) {
*errmsg = s.ToString();
}
} else if (devID.rfind("uuid:") == 0) {
std::map<std::string, std::string> zenFileSystems =
ListZenFileSystems();
devID.replace(0, strlen("uuid:"), "");

if (zenFileSystems.find(devID) == zenFileSystems.end()) {
*errmsg = "UUID not found";
} else {
s = NewZenFS(&fs, zenFileSystems[devID]);
if (!s.ok()) {
*errmsg = s.ToString();
}
}
} else {
*errmsg = "Malformed URI";
}
f->reset(fs);
return f->get();
});
#endif
#endif
4 changes: 4 additions & 0 deletions include/rocksdb/env.h
Original file line number Diff line number Diff line change
Expand Up @@ -1423,4 +1423,8 @@ Env* NewTimedEnv(Env* base_env);
// This is a env forwarding method defined in env/env_io_prof.cc
Env* NewIOProfEnv(Env* base_env);

// Returns a new environment that is used for ZENFS environment.
// This is a factory method for ZENFS declared in hdfs/env_zenfs.h
Status NewZenEnv(Env** env, const std::string& bdevname);

} // namespace TERARKDB_NAMESPACE
5 changes: 5 additions & 0 deletions include/rocksdb/terark_namespace.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,8 @@
#cmakedefine WITH_TERARK_ZIP
#cmakedefine WITH_BOOSTLIB
#cmakedefine WITH_DIAGNOSE_CACHE
#cmakedefine WITH_ZENFS

#ifdef WITH_ZENFS
#define LIBZBD
#endif
3 changes: 3 additions & 0 deletions src.mk
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,9 @@ MOCK_LIB_SOURCES = \
BENCH_LIB_SOURCES = \
tools/db_bench_tool.cc \

ZENFS_LIB_SOURCES = \
tools/zenfs_tool.cc \

EXP_LIB_SOURCES = \
utilities/col_buf_decoder.cc \
utilities/col_buf_encoder.cc \
Expand Down
12 changes: 7 additions & 5 deletions table/terark_zip_table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -488,11 +488,13 @@ Status TerarkZipTableFactory::SanitizeOptions(
auto& tzto = *reinterpret_cast<const TerarkZipTableOptions*>(
table_factory->GetOptions());
try {
terark::TempFileDeleteOnClose test;
test.path = tzto.localTempDir + "/Terark-XXXXXX";
test.open_temp();
test.writer << "Terark";
test.complete_write();
if (tzto.terarkZipMinLevel != -2) {
terark::TempFileDeleteOnClose test;
test.path = tzto.localTempDir + "/Terark-XXXXXX";
test.open_temp();
test.writer << "Terark";
test.complete_write();
}
} catch (...) {
std::string msg = "ERROR: bad localTempDir : " + tzto.localTempDir;
fprintf(stderr, "%s\n", msg.c_str());
Expand Down
1 change: 1 addition & 0 deletions third-party/zenfs
Submodule zenfs added at 1bc2a1
26 changes: 25 additions & 1 deletion tools/db_bench_tool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@
#include <io.h> // open/close
#endif

#ifdef LIBZBD
#include "env/env_zenfs.h"
#include "env/zbd_zenfs.h"
#endif

using GFLAGS_NAMESPACE::ParseCommandLineFlags;
using GFLAGS_NAMESPACE::RegisterFlagValidator;
using GFLAGS_NAMESPACE::SetUsageMessage;
Expand Down Expand Up @@ -826,10 +831,14 @@ DEFINE_uint64(prepare_log_writer_num, 1, "");
DEFINE_string(env_uri, "",
"URI for registry Env lookup. Mutually exclusive"
" with --hdfs.");
DEFINE_string(fs_uri, "",
"URI for registry Filesystem lookup. Mutually exclusive"
" with --hdfs and --env_uri."
" Creates a default environment with the specified filesystem.");
#endif // ROCKSDB_LITE
DEFINE_string(hdfs, "",
"Name of hdfs environment. Mutually exclusive with"
" --env_uri.");
" --env_uri and --fs_uri");
static TERARKDB_NAMESPACE::Env* FLAGS_env = TERARKDB_NAMESPACE::Env::Default();

DEFINE_int64(stats_interval, 0,
Expand Down Expand Up @@ -5817,6 +5826,13 @@ int db_bench_tool(int argc, char** argv) {
StringToCompressionType(FLAGS_compression_type.c_str());

#ifndef ROCKSDB_LITE
int env_opts =
!FLAGS_hdfs.empty() + !FLAGS_env_uri.empty() + !FLAGS_fs_uri.empty();
if (env_opts > 1) {
fprintf(stderr,
"Error: --hdfs, --env_uri and --fs_uri are mutually exclusive\n");
exit(1);
}
std::unique_ptr<Env> custom_env_guard;
if (!FLAGS_hdfs.empty() && !FLAGS_env_uri.empty()) {
fprintf(stderr, "Cannot provide both --hdfs and --env_uri.\n");
Expand All @@ -5827,6 +5843,14 @@ int db_bench_tool(int argc, char** argv) {
fprintf(stderr, "No Env registered for URI: %s\n", FLAGS_env_uri.c_str());
exit(1);
}
} else if (!FLAGS_fs_uri.empty()) {
#ifdef WITH_ZENFS
Status s = NewZenEnv(&FLAGS_env, FLAGS_fs_uri);
if (!s.ok()) {
fprintf(stderr, "Error: %s\n", s.ToString().c_str());
exit(1);
}
#endif
}
#endif // ROCKSDB_LITE
if (!FLAGS_hdfs.empty()) {
Expand Down
32 changes: 32 additions & 0 deletions tools/zenfs.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
// Copyright (c) 2019-present, Western Digital Corporation
// Copyright (c) 2021-present, Bytedance Inc. All rights reserved.
// This source code is licensed under both the GPLv2 (found in the
// COPYING file in the root directory) and Apache 2.0 License
// (found in the LICENSE.Apache file in the root directory).

#include <cstdio>
#ifdef ROCKSDB_LITE
int main() {
fprintf("Not supported in lite mode.\n");
return 1;
}
#else
#ifdef GFLAGS
#ifdef LIBZBD
int zenfs_tool(int argc, char** argv);
int main(int argc, char** argv) { return zenfs_tool(argc, argv); }
#else
int main() {
fprintf(stderr, "Please install libzbd to run the zenfs tool\n");
return 1;
}
#endif // LIBZBD
#else
int main() {
fprintf(stderr, "Please install gflags to run rocksdb tools\n");
return 1;
}
#endif // GFLAGS
#endif // ROCKSDB_LITE

Loading

0 comments on commit 6eecaa2

Please sign in to comment.