Commit c571aad0 authored by Kenton Varda's avatar Kenton Varda

CatRank benchmark case.

parent 178b5fdd
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
#include <stdexcept> #include <stdexcept>
#include <memory> #include <memory>
#include <thread> #include <thread>
#include <mutex> #include <algorithm>
#include <sys/types.h> #include <sys/types.h>
#include <sys/wait.h> #include <sys/wait.h>
#include <semaphore.h> #include <semaphore.h>
...@@ -184,6 +184,125 @@ public: ...@@ -184,6 +184,125 @@ public:
} }
}; };
// =======================================================================================
// Test case: Cat Rank
//
// The server receives a list of candidate search results with scores. It promotes the ones that
// mention "cat" in their snippet and demotes the ones that mention "dog", sorts the results by
// descending score, and returns.
//
// The promotion multiplier is large enough that all the results mentioning "cat" but not "dog"
// should end up at the front ofthe list, which is how we verify the result.
static const char* WORDS[] = {
"foo ", "bar ", "baz ", "qux ", "quux ", "corge ", "grault ", "garply ", "waldo ", "fred ",
"plugh ", "xyzzy ", "thud "
};
constexpr size_t WORDS_COUNT = sizeof(WORDS) / sizeof(WORDS[0]);
struct ScoredResult {
double score;
SearchResult::Reader result;
ScoredResult() = default;
ScoredResult(double score, SearchResult::Reader result): score(score), result(result) {}
inline bool operator<(const ScoredResult& other) const { return score > other.score; }
};
class CatRankTestCase {
public:
typedef SearchResultList Request;
typedef SearchResultList Response;
typedef int Expectation;
static int setupRequest(SearchResultList::Builder request) {
int count = rand() % 1000;
int goodCount = 0;
auto list = request.initResults(count);
for (int i = 0; i < count; i++) {
SearchResult::Builder result = list[i];
result.setScore(1000 - i);
int urlSize = rand() % 100;
static const char URL_PREFIX[] = "http://example.com/";
auto url = result.initUrl(urlSize + sizeof(URL_PREFIX));
strcpy(url.data(), URL_PREFIX);
char* pos = url.data() + strlen(URL_PREFIX);
for (int j = 0; j < urlSize; j++) {
*pos++ = 'a' + rand() % 26;
}
bool isCat = rand() % 8 == 0;
bool isDog = rand() % 8 == 0;
goodCount += isCat && !isDog;
std::string snippet;
snippet.push_back(' ');
int prefix = rand() % 20;
for (int j = 0; j < prefix; j++) {
snippet.append(WORDS[rand() % WORDS_COUNT]);
}
if (isCat) snippet.append("cat ");
if (isDog) snippet.append("dog ");
int suffix = rand() % 20;
for (int j = 0; j < suffix; j++) {
snippet.append(WORDS[rand() % WORDS_COUNT]);
}
result.setSnippet(snippet);
}
return goodCount;
}
static inline void handleRequest(SearchResultList::Reader request,
SearchResultList::Builder response) {
std::vector<ScoredResult> scoredResults;
for (auto result: request.getResults()) {
double score = result.getScore();
if (strstr(result.getSnippet().c_str(), " cat ") != nullptr) {
score *= 10000;
}
if (strstr(result.getSnippet().c_str(), " dog ") != nullptr) {
score /= 10000;
}
scoredResults.emplace_back(score, result);
}
std::sort(scoredResults.begin(), scoredResults.end());
auto list = response.initResults(scoredResults.size());
auto iter = list.begin();
for (auto result: scoredResults) {
iter->setScore(result.score);
iter->setUrl(result.result.getUrl());
iter->setSnippet(result.result.getSnippet());
++iter;
}
}
static inline bool checkResponse(SearchResultList::Reader response, int expectedGoodCount) {
int goodCount = 0;
for (auto result: response.getResults()) {
if (result.getScore() > 1001) {
++goodCount;
} else {
break;
}
}
return goodCount == expectedGoodCount;
}
};
// ======================================================================================= // =======================================================================================
class CountingOutputStream: public FdOutputStream { class CountingOutputStream: public FdOutputStream {
...@@ -443,62 +562,75 @@ uint64_t passByPipe(Func&& clientFunc, uint64_t iters) { ...@@ -443,62 +562,75 @@ uint64_t passByPipe(Func&& clientFunc, uint64_t iters) {
} }
} }
template <typename ReuseStrategy, typename Compression> template <typename TestCase, typename ReuseStrategy, typename Compression>
uint64_t doBenchmark(const std::string& mode, uint64_t iters) { uint64_t doBenchmark(const std::string& mode, uint64_t iters) {
if (mode == "client") { if (mode == "client") {
return syncClient<ExpressionTestCase, ReuseStrategy, Compression>( return syncClient<TestCase, ReuseStrategy, Compression>(
STDIN_FILENO, STDOUT_FILENO, iters); STDIN_FILENO, STDOUT_FILENO, iters);
} else if (mode == "server") { } else if (mode == "server") {
return server<ExpressionTestCase, ReuseStrategy, Compression>( return server<TestCase, ReuseStrategy, Compression>(
STDIN_FILENO, STDOUT_FILENO, iters); STDIN_FILENO, STDOUT_FILENO, iters);
} else if (mode == "object") { } else if (mode == "object") {
return passByObject<ExpressionTestCase, ReuseStrategy, Compression>(iters); return passByObject<TestCase, ReuseStrategy, Compression>(iters);
} else if (mode == "bytes") { } else if (mode == "bytes") {
return passByBytes<ExpressionTestCase, ReuseStrategy, Compression>(iters); return passByBytes<TestCase, ReuseStrategy, Compression>(iters);
} else if (mode == "pipe") { } else if (mode == "pipe") {
return passByPipe<ExpressionTestCase, ReuseStrategy, Compression>( return passByPipe<TestCase, ReuseStrategy, Compression>(
syncClient<ExpressionTestCase, ReuseStrategy, Compression>, iters); syncClient<TestCase, ReuseStrategy, Compression>, iters);
} else if (mode == "pipe-async") { } else if (mode == "pipe-async") {
return passByPipe<ExpressionTestCase, ReuseStrategy, Compression>( return passByPipe<TestCase, ReuseStrategy, Compression>(
asyncClient<ExpressionTestCase, ReuseStrategy, Compression>, iters); asyncClient<TestCase, ReuseStrategy, Compression>, iters);
} else { } else {
std::cerr << "Unknown mode: " << mode << std::endl; std::cerr << "Unknown mode: " << mode << std::endl;
exit(1); exit(1);
} }
} }
template <typename Compression> template <typename TestCase, typename Compression>
uint64_t doBenchmark2(const std::string& mode, const std::string& reuse, uint64_t iters) { uint64_t doBenchmark2(const std::string& mode, const std::string& reuse, uint64_t iters) {
if (reuse == "reuse") { if (reuse == "reuse") {
return doBenchmark<UseScratch<Compression, 1024>, Compression>(mode, iters); return doBenchmark<TestCase, UseScratch<Compression, 1024>, Compression>(mode, iters);
} else if (reuse == "no-reuse") { } else if (reuse == "no-reuse") {
return doBenchmark<NoScratch<Compression>, Compression>(mode, iters); return doBenchmark<TestCase, NoScratch<Compression>, Compression>(mode, iters);
} else { } else {
std::cerr << "Unknown reuse mode: " << reuse << std::endl; std::cerr << "Unknown reuse mode: " << reuse << std::endl;
exit(1); exit(1);
} }
} }
template <typename TestCase>
uint64_t doBenchmark3(const std::string& mode, const std::string& reuse,
const std::string& compression, uint64_t iters) {
if (compression == "none") {
return doBenchmark2<TestCase, Uncompressed>(mode, reuse, iters);
} else if (compression == "snappy") {
return doBenchmark2<TestCase, SnappyCompressed>(mode, reuse, iters);
} else {
std::cerr << "Unknown compression mode: " << compression << std::endl;
exit(1);
}
}
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
if (argc != 5) { if (argc != 6) {
std::cerr << "USAGE: " << argv[0] << " MODE REUSE COMPRESSION ITERATION_COUNT" << std::endl; std::cerr << "USAGE: " << argv[0] << " MODE REUSE COMPRESSION ITERATION_COUNT" << std::endl;
return 1; return 1;
} }
uint64_t iters = strtoull(argv[4], nullptr, 0); uint64_t iters = strtoull(argv[5], nullptr, 0);
srand(123); srand(123);
std::cerr << "Doing " << iters << " iterations..." << std::endl; std::cerr << "Doing " << iters << " iterations..." << std::endl;
uint64_t throughput; uint64_t throughput;
std::string compression = argv[3]; std::string testcase = argv[1];
if (compression == "none") { if (testcase == "eval") {
throughput = doBenchmark2<Uncompressed>(argv[1], argv[2], iters); throughput = doBenchmark3<ExpressionTestCase>(argv[2], argv[3], argv[4], iters);
} else if (compression == "snappy") { } else if (testcase == "catrank") {
throughput = doBenchmark2<SnappyCompressed>(argv[1], argv[2], iters); throughput = doBenchmark3<CatRankTestCase>(argv[2], argv[3], argv[4], iters);
} else { } else {
std::cerr << "Unknown compression mode: " << compression << std::endl; std::cerr << "Unknown test case: " << testcase << std::endl;
return 1; return 1;
} }
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#include <stdexcept> #include <stdexcept>
#include <memory> #include <memory>
#include <thread> #include <thread>
#include <mutex> #include <algorithm>
#include <sys/types.h> #include <sys/types.h>
#include <sys/wait.h> #include <sys/wait.h>
#include <semaphore.h> #include <semaphore.h>
...@@ -166,8 +166,6 @@ int32_t evaluateExpression(const Expression& exp) { ...@@ -166,8 +166,6 @@ int32_t evaluateExpression(const Expression& exp) {
class ExpressionTestCase { class ExpressionTestCase {
public: public:
~ExpressionTestCase() {}
typedef Expression Request; typedef Expression Request;
typedef EvaluationResult Response; typedef EvaluationResult Response;
typedef int32_t Expectation; typedef int32_t Expectation;
...@@ -183,6 +181,114 @@ public: ...@@ -183,6 +181,114 @@ public:
} }
}; };
// =======================================================================================
// Test case: Cat Rank
//
// The server receives a list of candidate search results with scores. It promotes the ones that
// mention "cat" in their snippet and demotes the ones that mention "dog", sorts the results by
// descending score, and returns.
//
// The promotion multiplier is large enough that all the results mentioning "cat" but not "dog"
// should end up at the front ofthe list, which is how we verify the result.
static const char* WORDS[] = {
"foo ", "bar ", "baz ", "qux ", "quux ", "corge ", "grault ", "garply ", "waldo ", "fred ",
"plugh ", "xyzzy ", "thud "
};
constexpr size_t WORDS_COUNT = sizeof(WORDS) / sizeof(WORDS[0]);
struct ScoredResult {
double score;
const SearchResult* result;
ScoredResult() = default;
ScoredResult(double score, const SearchResult* result): score(score), result(result) {}
inline bool operator<(const ScoredResult& other) const { return score > other.score; }
};
class CatRankTestCase {
public:
typedef SearchResultList Request;
typedef SearchResultList Response;
typedef int Expectation;
static int setupRequest(SearchResultList* request) {
int count = rand() % 1000;
int goodCount = 0;
for (int i = 0; i < count; i++) {
SearchResult* result = request->add_result();
result->set_score(1000 - i);
result->set_url("http://example.com/");
std::string* url = result->mutable_url();
int urlSize = rand() % 100;
for (int j = 0; j < urlSize; j++) {
url->push_back('a' + rand() % 26);
}
bool isCat = rand() % 8 == 0;
bool isDog = rand() % 8 == 0;
goodCount += isCat && !isDog;
std::string* snippet = result->mutable_snippet();
snippet->push_back(' ');
int prefix = rand() % 20;
for (int j = 0; j < prefix; j++) {
snippet->append(WORDS[rand() % WORDS_COUNT]);
}
if (isCat) snippet->append("cat ");
if (isDog) snippet->append("dog ");
int suffix = rand() % 20;
for (int j = 0; j < suffix; j++) {
snippet->append(WORDS[rand() % WORDS_COUNT]);
}
}
return goodCount;
}
static inline void handleRequest(const SearchResultList& request, SearchResultList* response) {
std::vector<ScoredResult> scoredResults;
for (auto& result: request.result()) {
double score = result.score();
if (result.snippet().find(" cat ") != std::string::npos) {
score *= 10000;
}
if (result.snippet().find(" dog ") != std::string::npos) {
score /= 10000;
}
scoredResults.emplace_back(score, &result);
}
std::sort(scoredResults.begin(), scoredResults.end());
for (auto& result: scoredResults) {
SearchResult* out = response->add_result();
out->set_score(result.score);
out->set_url(result.result->url());
out->set_snippet(result.result->snippet());
}
}
static inline bool checkResponse(const SearchResultList& response, int expectedGoodCount) {
int goodCount = 0;
for (auto& result: response.result()) {
if (result.score() > 1001) {
++goodCount;
} else {
break;
}
}
return goodCount == expectedGoodCount;
}
};
// ======================================================================================= // =======================================================================================
struct SingleUseMessages { struct SingleUseMessages {
...@@ -293,8 +399,8 @@ void readAll(int fd, void* buffer, size_t size) { ...@@ -293,8 +399,8 @@ void readAll(int fd, void* buffer, size_t size) {
} }
} }
static char scratch[128 << 10]; static char scratch[1 << 20];
static char scratch2[128 << 10]; static char scratch2[1 << 20];
struct SnappyCompressed { struct SnappyCompressed {
typedef int InputStream; typedef int InputStream;
...@@ -546,62 +652,76 @@ uint64_t passByPipe(Func&& clientFunc, uint64_t iters) { ...@@ -546,62 +652,76 @@ uint64_t passByPipe(Func&& clientFunc, uint64_t iters) {
} }
} }
template <typename ReuseStrategy, typename Compression> template <typename TestCase, typename ReuseStrategy, typename Compression>
uint64_t doBenchmark(const std::string& mode, uint64_t iters) { uint64_t doBenchmark(const std::string& mode, uint64_t iters) {
if (mode == "client") { if (mode == "client") {
return syncClient<ExpressionTestCase, ReuseStrategy, Compression>( return syncClient<TestCase, ReuseStrategy, Compression>(
STDIN_FILENO, STDOUT_FILENO, iters); STDIN_FILENO, STDOUT_FILENO, iters);
} else if (mode == "server") { } else if (mode == "server") {
return server<ExpressionTestCase, ReuseStrategy, Compression>( return server<TestCase, ReuseStrategy, Compression>(
STDIN_FILENO, STDOUT_FILENO, iters); STDIN_FILENO, STDOUT_FILENO, iters);
} else if (mode == "object") { } else if (mode == "object") {
return passByObject<ExpressionTestCase, ReuseStrategy, Compression>(iters); return passByObject<TestCase, ReuseStrategy, Compression>(iters);
} else if (mode == "bytes") { } else if (mode == "bytes") {
return passByBytes<ExpressionTestCase, ReuseStrategy, Compression>(iters); return passByBytes<TestCase, ReuseStrategy, Compression>(iters);
} else if (mode == "pipe") { } else if (mode == "pipe") {
return passByPipe<ExpressionTestCase, ReuseStrategy, Compression>( return passByPipe<TestCase, ReuseStrategy, Compression>(
syncClient<ExpressionTestCase, ReuseStrategy, Compression>, iters); syncClient<TestCase, ReuseStrategy, Compression>, iters);
} else if (mode == "pipe-async") { } else if (mode == "pipe-async") {
return passByPipe<ExpressionTestCase, ReuseStrategy, Compression>( return passByPipe<TestCase, ReuseStrategy, Compression>(
asyncClient<ExpressionTestCase, ReuseStrategy, Compression>, iters); asyncClient<TestCase, ReuseStrategy, Compression>, iters);
} else { } else {
std::cerr << "Unknown mode: " << mode << std::endl; std::cerr << "Unknown mode: " << mode << std::endl;
exit(1); exit(1);
} }
} }
template <typename Compression> template <typename TestCase, typename Compression>
uint64_t doBenchmark2(const std::string& mode, const std::string& reuse, uint64_t iters) { uint64_t doBenchmark2(const std::string& mode, const std::string& reuse, uint64_t iters) {
if (reuse == "reuse") { if (reuse == "reuse") {
return doBenchmark<ReusableMessages, Compression>(mode, iters); return doBenchmark<TestCase, ReusableMessages, Compression>(mode, iters);
} else if (reuse == "no-reuse") { } else if (reuse == "no-reuse") {
return doBenchmark<SingleUseMessages, Compression>(mode, iters); return doBenchmark<TestCase, SingleUseMessages, Compression>(mode, iters);
} else { } else {
std::cerr << "Unknown reuse mode: " << reuse << std::endl; std::cerr << "Unknown reuse mode: " << reuse << std::endl;
exit(1); exit(1);
} }
} }
template <typename TestCase>
uint64_t doBenchmark3(const std::string& mode, const std::string& reuse,
const std::string& compression, uint64_t iters) {
if (compression == "none") {
return doBenchmark2<TestCase, Uncompressed>(mode, reuse, iters);
} else if (compression == "snappy") {
return doBenchmark2<TestCase, SnappyCompressed>(mode, reuse, iters);
} else {
std::cerr << "Unknown compression mode: " << compression << std::endl;
exit(1);
}
}
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
if (argc != 5) { if (argc != 6) {
std::cerr << "USAGE: " << argv[0] << " MODE REUSE COMPRESSION ITERATION_COUNT" << std::endl; std::cerr << "USAGE: " << argv[0]
<< " TEST_CASE MODE REUSE COMPRESSION ITERATION_COUNT" << std::endl;
return 1; return 1;
} }
uint64_t iters = strtoull(argv[4], nullptr, 0); uint64_t iters = strtoull(argv[5], nullptr, 0);
srand(123); srand(123);
std::cerr << "Doing " << iters << " iterations..." << std::endl; std::cerr << "Doing " << iters << " iterations..." << std::endl;
uint64_t throughput; uint64_t throughput;
std::string compression = argv[3]; std::string testcase = argv[1];
if (compression == "none") { if (testcase == "eval") {
throughput = doBenchmark2<Uncompressed>(argv[1], argv[2], iters); throughput = doBenchmark3<ExpressionTestCase>(argv[2], argv[3], argv[4], iters);
} else if (compression == "snappy") { } else if (testcase == "catrank") {
throughput = doBenchmark2<SnappyCompressed>(argv[1], argv[2], iters); throughput = doBenchmark3<CatRankTestCase>(argv[2], argv[3], argv[4], iters);
} else { } else {
std::cerr << "Unknown compression mode: " << compression << std::endl; std::cerr << "Unknown test case: " << testcase << std::endl;
return 1; return 1;
} }
......
...@@ -40,3 +40,15 @@ struct Expression { ...@@ -40,3 +40,15 @@ struct Expression {
struct EvaluationResult { struct EvaluationResult {
value@0: Int32; value@0: Int32;
} }
# ========================================================================================
struct SearchResultList {
results@0: List(SearchResult);
}
struct SearchResult {
url@0: Text;
score@1: Float64;
snippet@2: Text;
}
...@@ -42,3 +42,15 @@ message Expression { ...@@ -42,3 +42,15 @@ message Expression {
message EvaluationResult { message EvaluationResult {
required sint32 value = 1; required sint32 value = 1;
} }
// =======================================================================================
message SearchResultList {
repeated SearchResult result = 1;
}
message SearchResult {
optional string url = 1;
optional double score = 2;
optional string snippet = 3;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment