Skip to content

Commit

Permalink
host_exerciser updates (#2355)
Browse files Browse the repository at this point in the history
host_exerciser updates

- Read the AFU frequency from a new CSR.
- Add tests for PCIe atomic functions, including validating that each function
  is performed properly.
- Fill the source buffer with random data instead of the same pattern over
  and over so that read errors are more likely to be found.
- Make the log level work. Dump a few lines of the buffers in some modes.
  • Loading branch information
michael-adler committed Nov 2, 2021
1 parent 0d4323a commit d80afb6
Show file tree
Hide file tree
Showing 2 changed files with 209 additions and 15 deletions.
47 changes: 42 additions & 5 deletions samples/host_exerciser/host_exerciser.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ enum {
HE_STATUS0 = 0x0160,
HE_STATUS1 = 0x0168,
HE_ERROR = 0x0170,
HE_STRIDE = 0x0178
HE_STRIDE = 0x0178,
HE_INFO0 = 0x0180,
};

//configures test mode
Expand All @@ -87,6 +88,19 @@ typedef enum {
HOSTEXE_CLS_8 = 0x3,
} hostexe_req_len;

//configures atomic transactions
typedef enum {
// Bit 0 enables atomic function mode
// Bit 1 selects 4 byte or 8 byte requests
// Bits [3:2] select the function
HOSTEXE_ATOMIC_OFF = 0,
HOSTEXE_ATOMIC_FADD_4 = 0x1,
HOSTEXE_ATOMIC_FADD_8 = 0x3,
HOSTEXE_ATOMIC_SWAP_4 = 0x5,
HOSTEXE_ATOMIC_SWAP_8 = 0x7,
HOSTEXE_ATOMIC_CAS_4 = 0x9,
HOSTEXE_ATOMIC_CAS_8 = 0xb,
} atomic_func;

//he test type
typedef enum {
Expand Down Expand Up @@ -173,7 +187,8 @@ union he_cfg {
uint64_t Continuous : 1;
uint64_t TestMode : 3;
uint64_t ReqLen : 2;
uint64_t Rsvd_19_7 : 13;
uint64_t AtomicFunc : 5;
uint64_t Rsvd_19_12 : 8;
uint64_t TputInterleave : 3;
uint64_t TestCfg : 5;
uint64_t IntrOnErr : 1;
Expand Down Expand Up @@ -296,6 +311,16 @@ const std::map<std::string, uint32_t> he_req_cls_len= {
{ "cl_8", HOSTEXE_CLS_8},
};

const std::map<std::string, uint32_t> he_req_atomic_func = {
{ "off", HOSTEXE_ATOMIC_OFF},
{ "fadd_4", HOSTEXE_ATOMIC_FADD_4},
{ "fadd_8", HOSTEXE_ATOMIC_FADD_8},
{ "swap_4", HOSTEXE_ATOMIC_SWAP_4},
{ "swap_8", HOSTEXE_ATOMIC_SWAP_8},
{ "cas_4", HOSTEXE_ATOMIC_CAS_4},
{ "cas_8", HOSTEXE_ATOMIC_CAS_8},
};

const std::map<std::string, uint32_t> he_test_mode = {
{ "test_rollover", HOSTEXE_TEST_ROLLOVER},
{ "test_termination", HOSTEXE_TEST_TERMINATION}
Expand Down Expand Up @@ -331,6 +356,10 @@ class host_exerciser : public test_afu {
// Configures test rollover or test termination
app_.add_option("--continuousmode", he_continuousmode_, "test rollover or test termination")->default_val("false");

// Atomic function
app_.add_option("--atomic", he_req_atomic_func_, "atomic requests (only permitted in combination with lpbk/cl_1)")
->transform(CLI::CheckedTransformer(he_req_atomic_func))->default_val("off");

// Delay
app_.add_option("-d,--delay", he_delay_, "Enables random delay insertion between requests")->default_val("false");

Expand All @@ -346,12 +375,14 @@ class host_exerciser : public test_afu {
app_.add_option("--contmodetime", he_contmodetime_,
"Continuous mode time in seconds")->default_val("0");

app_.add_option("--clock-mhz", he_clock_mhz_,
"Clock frequency (MHz) -- when zero, read the frequency from the AFU")->default_val("0");
}

virtual int run(CLI::App *app, test_command::ptr_t test) override
{
int res = exit_codes::not_run;
logger_->set_level(spdlog::level::trace);
logger_->set_level(spdlog::level::from_str(log_level_));
logger_->info("starting test run, count of {0:d}", count_);
uint32_t count = 0;
try {
Expand Down Expand Up @@ -404,9 +435,9 @@ class host_exerciser : public test_afu {
{
std::random_device rd;
std::mt19937 mt(rd());
std::uniform_int_distribution<uint32_t> dist(1, 4096);
std::uniform_int_distribution<uint32_t> dist(1, -1);
auto sz = sizeof(uint32_t);
for (uint32_t i = 0; i < buffer->size()/sz; i+=sz){
for (uint32_t i = 0; i < buffer->size(); i+=sz){
buffer->write<uint32_t>(dist(mt), i);
}

Expand Down Expand Up @@ -466,11 +497,13 @@ class host_exerciser : public test_afu {
uint32_t count_;
uint32_t he_modes_;
uint32_t he_req_cls_len_;
uint32_t he_req_atomic_func_;
bool he_delay_;
bool he_continuousmode_;
uint32_t he_interleave_;
uint32_t he_interrupt_;
uint32_t he_contmodetime_;
uint32_t he_clock_mhz_;

std::map<uint32_t, uint32_t> limits_;

Expand All @@ -489,6 +522,10 @@ class host_exerciser : public test_afu {
return handle_->get_token();
}

bool should_log(spdlog::level::level_enum level)
{
return logger_->should_log(level);
}
};
} // end of namespace host_exerciser

177 changes: 167 additions & 10 deletions samples/host_exerciser/host_exerciser_cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,8 @@ class host_exerciser_cmd : public test_command

// print bandwidth
if (dsm_status->num_ticks > 0) {
double perf_data = (double)(num_cache_lines * 64) /
(2.85 * (dsm_status->num_ticks));
double perf_data = (double)(num_cache_lines * 64) /
((1000.0 / host_exe_->he_clock_mhz_ * (dsm_status->num_ticks)));
std::cout << "Bandwidth: " << std::setprecision(3) <<
perf_data << " GB/s" << std::endl;
}
Expand Down Expand Up @@ -184,19 +184,137 @@ class host_exerciser_cmd : public test_command
he_lpbk_ctl_.ResetL = 1;
he_lpbk_ctl_.ForcedTestCmpl = 1;
host_exe_->write32(HE_CTL, he_lpbk_ctl_.value);
// sleep for 1 seconds to gracefully exit
sleep(1);

if (! he_wait_test_completion())
sleep(1);

he_lpbk_ctl_.value = 0;
host_exe_->write32(HE_CTL, he_lpbk_ctl_.value);
usleep(1000);
}

void he_init_src_buffer(shared_buffer::ptr_t buffer)
{
// Fill the source buffer with random values
host_exe_->fill(buffer);

// Compare and swap? If so, seed the source buffers with values will
// match. The hardware tests uses the line index as the test.
if (he_lpbk_cfg_.AtomicFunc == HOSTEXE_ATOMIC_CAS_4) {
for (uint32_t i = 0; i < buffer->size()/CL; i += 3) {
buffer->write<uint32_t>(i, i*CL);
}
}
if (he_lpbk_cfg_.AtomicFunc == HOSTEXE_ATOMIC_CAS_8) {
for (uint32_t i = 0; i < buffer->size()/CL; i += 3) {
buffer->write<uint64_t>(i, i*CL);
}
}

// In atomic mode, at most the first 8 bytes of each line will be
// updated and copied. In the source buffer, write a function of
// the value at the start of each line to the second position so
// it can be used as a check later.
if (he_lpbk_cfg_.AtomicFunc != HOSTEXE_ATOMIC_OFF) {
for (uint32_t i = 0; i < buffer->size()/CL; i += 1) {
uint64_t v = buffer->read<uint64_t>(i*CL);
buffer->write<uint64_t>(v ^ 0xabababababababab, i*CL + 8);
}
}
}

void he_dump_buffer(shared_buffer::ptr_t buffer, const char* msg)
{
std::cout << msg << ":" << std::endl;

// Dump the first 8 lines of a buffer
for (uint64_t i = 0; i < 8; i++)
{
std::cout << std::hex
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*7)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*6)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*5)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*4)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*3)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*2)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL + 8*1)
<< " " << std::setfill('0') << std::setw(16) << buffer->read<uint64_t>(i*CL)
<< std::dec << std::endl;
}

std::cout << " ..." << std::endl;
}

void he_compare_buffer()
{
host_exerciser_swtestmsg();
/* Compare buffer contents only loopback test mode*/
if (he_lpbk_cfg_.TestMode == HOST_EXEMODE_LPBK1)

// Compare buffer contents only loopback test mode
if (he_lpbk_cfg_.TestMode != HOST_EXEMODE_LPBK1)
return;

// Normal (non-atomic) is a simple comparison
if (he_lpbk_cfg_.AtomicFunc == HOSTEXE_ATOMIC_OFF) {
host_exe_->compare(source_, destination_);
return;
}

// Atomic mode is a far more complicated comparison. The source buffer
// is modified and some of the original state is copied to the destination.
bool size_is_4 = ((he_lpbk_cfg_.AtomicFunc & 2) == 0);
bool is_fetch_add = (he_lpbk_cfg_.AtomicFunc & 0xc) == 0;
bool is_swap = (he_lpbk_cfg_.AtomicFunc & 0xc) == 4;
bool is_cas = (he_lpbk_cfg_.AtomicFunc & 0xc) == 8;

// Ignore the last entry to work around an off by one copy error
for (uint64_t i = 0; i < source_->size()/CL; i += 1) {
// In source_, the first entry in every line is the atomically modified
// value. The second entry holds the original value, hashed with a constant
// so it isn't a simple repetition.
uint64_t src_a = source_->read<uint64_t>(i*CL);
// Read the original value and reverse the hash.
uint64_t src_b = source_->read<uint64_t>(i*CL + 8) ^ 0xabababababababab;
uint64_t upd_a = 0;

// Compute expected value
if (is_fetch_add) {
// Hardware added the line index (i) to each line, either preserving
// the high 4 bytes or modifying all 8 bytes.
if (size_is_4)
upd_a = (src_b & 0xffffffff00000000) | ((src_b + i) & 0xffffffff);
else
upd_a = src_b + i;
}
if (is_swap) {
// Hardware swapped the line index (i) into each line
if (size_is_4)
upd_a = (src_b & 0xffffffff00000000) | (i & 0xffffffff);
else
upd_a = i;
}
if (is_cas) {
// Hardware swapped the bit inverse of line index (i) in each line when
// the original value is the line index.
if (size_is_4)
upd_a = ((src_b & 0xffffffff) == i) ? (src_b & 0xffffffff00000000) | (~i & 0xffffffff) : src_b;
else
upd_a = (src_b == i) ? ~i : src_b;
}

if (upd_a != src_a)
throw std::runtime_error("Atomic update error");

// The destination is comparatively easy. For all functions it is
// simply the original source value.
uint64_t dst_a = destination_->read<uint64_t>(i*CL);
if (size_is_4) {
src_b &= 0xffffffff;
dst_a &= 0xffffffff;
}

if (dst_a != src_b)
throw std::runtime_error("Atomic read error or write error");
}
}

bool he_continuousmode()
Expand Down Expand Up @@ -249,9 +367,18 @@ class host_exerciser_cmd : public test_command
he_lpbk_cfg_.IntrTestMode = 1;
}

// Atomic functions
he_lpbk_cfg_.AtomicFunc = host_exe_->he_req_atomic_func_;
if (he_lpbk_cfg_.AtomicFunc != HOSTEXE_ATOMIC_OFF) {
if (he_lpbk_cfg_.ReqLen != HOSTEXE_CLS_1) {
std::cerr << "Atomic function mode requires cl_1" << std::endl;
return -1;
}
}

if (host_exe_->he_continuousmode_ &&
(he_lpbk_cfg_.IntrTestMode == 1)) {
std::cerr << "Interrupts doesn't support in Continuous mode"
std::cerr << "Interrupts not supported in continuous mode"
<< std::endl;
return -1;
}
Expand All @@ -278,11 +405,26 @@ class host_exerciser_cmd : public test_command

auto ret = parse_input_options();
if (ret != 0) {
std::cerr << "Failed to parese input options" << std::endl;
std::cerr << "Failed to parse input options" << std::endl;
return ret;
}
std::cout << "Input Config:" << he_lpbk_cfg_.value << std::endl;

if (0 == host_exe_->he_clock_mhz_) {
// Does the AFU record its clock info?
uint16_t freq = host_exe_->read64(HE_INFO0);
if (freq) {
host_exe_->he_clock_mhz_ = freq;
std::cout << "AFU clock: "
<< host_exe_->he_clock_mhz_ << " MHz" << std::endl;
}
else {
host_exe_->he_clock_mhz_ = 350;
std::cout << "Frequency of AFU clock unknown. Assuming "
<< host_exe_->he_clock_mhz_ << " MHz." << std::endl;
}
}

// assert reset he-lpbk
he_lpbk_ctl_.value = 0;
d_afu->write32(HE_CTL, he_lpbk_ctl_.value);
Expand All @@ -300,7 +442,7 @@ class host_exerciser_cmd : public test_command
std::cout << "Allocate SRC Buffer" << std::endl;
source_ = d_afu->allocate(LPBK1_BUFFER_ALLOCATION_SIZE);
d_afu->write64(HE_SRC_ADDR, cacheline_aligned_addr(source_->io_address()));
std::fill_n(source_->c_type(), LPBK1_BUFFER_SIZE, 0xAF);
he_init_src_buffer(source_);

/* Allocate Destination Buffer
Write to CSR_DST_ADDR */
Expand All @@ -321,7 +463,7 @@ class host_exerciser_cmd : public test_command
// Number of cache lines
d_afu->write64(HE_NUM_LINES, (LPBK1_BUFFER_SIZE / (1 * CL)) -1);

// Write to CSR_CFG
// Write to CSR_CFG
d_afu->write64(HE_CFG, he_lpbk_cfg_.value);

event::ptr_t ev = nullptr;
Expand All @@ -332,6 +474,13 @@ class host_exerciser_cmd : public test_command
std::cout << "Using Interrupts\n";
}

if (host_exe_->should_log(spdlog::level::debug)) {
std::cout << std::endl;
he_dump_buffer(source_, "Pre-execution source");
he_dump_buffer(destination_, "Pre-execution destination");
std::cout << std::endl;
}

// Write to CSR_CTL
std::cout << "Start Test" << std::endl;
he_lpbk_ctl_.value = 0;
Expand All @@ -355,6 +504,14 @@ class host_exerciser_cmd : public test_command
if (!he_wait_test_completion()) {
return -1;
}

if (host_exe_->should_log(spdlog::level::debug)) {
std::cout << std::endl;
he_dump_buffer(source_, "Post-execution source");
he_dump_buffer(destination_, "Post-execution destination");
std::cout << std::endl;
}

he_compare_buffer();
he_perf_counters();
}
Expand Down

0 comments on commit d80afb6

Please sign in to comment.