Commit 07bcf21a authored by Joshua Haberman's avatar Joshua Haberman

Merge pull request #1464 from google/benchmarks

Added framework for generating/consuming benchmarking data sets.
parents 7dda3122 247ef1f0
......@@ -9,7 +9,7 @@ AUTOMAKE_OPTIONS = foreign
SUBDIRS = . src
# Always include gmock in distributions.
DIST_SUBDIRS = $(subdirs) src conformance
DIST_SUBDIRS = $(subdirs) src conformance benchmarks
# Build gmock before we build protobuf tests. We don't add gmock to SUBDIRS
# because then "make check" would also build and run all of gmock's own tests,
......@@ -36,6 +36,10 @@ clean-local:
echo "Making clean in conformance"; \
cd conformance && $(MAKE) $(AM_MAKEFLAGS) clean; \
fi; \
if test -e benchmarks/Makefile; then \
echo "Making clean in benchmarks"; \
cd benchmarks && $(MAKE) $(AM_MAKEFLAGS) clean; \
fi; \
if test -e objectivec/DevTools; then \
echo "Cleaning any ObjC pyc files"; \
rm -f objectivec/DevTools/*.pyc; \
......
benchmarks_protoc_inputs = \
benchmarks.proto \
benchmark_messages_proto3.proto
benchmarks_protoc_inputs_proto2 = \
benchmark_messages_proto2.proto
benchmarks_protoc_outputs = \
benchmarks.pb.cc \
benchmarks.pb.h \
benchmark_messages_proto3.pb.cc \
benchmark_messages_proto3.pb.h
benchmarks_protoc_outputs_proto2 = \
benchmark_messages_proto2.pb.cc \
benchmark_messages_proto2.pb.h
bin_PROGRAMS = generate-datasets
generate_datasets_LDADD = $(top_srcdir)/src/libprotobuf.la
generate_datasets_SOURCES = generate_datasets.cc
generate_datasets_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir)
nodist_generate_datasets_SOURCES = \
$(benchmarks_protoc_outputs) \
$(benchmarks_protoc_outputs_proto2)
# Explicit deps because BUILT_SOURCES are only done before a "make all/check"
# so a direct "make test_cpp" could fail if parallel enough.
# See: https://www.gnu.org/software/automake/manual/html_node/Built-Sources-Example.html#Recording-Dependencies-manually
generate_datasets-generate_datasets.$(OBJEXT): benchmarks.pb.h
$(benchmarks_protoc_outputs): protoc_middleman
$(benchmarks_protoc_outputs_proto2): protoc_middleman2
CLEANFILES = \
$(benchmarks_protoc_outputs) \
$(benchmarks_protoc_outputs_proto2) \
protoc_middleman \
protoc_middleman2 \
dataset.*
if USE_EXTERNAL_PROTOC
protoc_middleman: $(benchmarks_protoc_inputs)
$(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs)
touch protoc_middleman
protoc_middleman2: $(benchmarks_protoc_inputs_proto2)
$(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs_proto2)
touch protoc_middleman2
else
# We have to cd to $(srcdir) before executing protoc because $(protoc_inputs) is
# relative to srcdir, which may not be the same as the current directory when
# building out-of-tree.
protoc_middleman: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs)
oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs) )
touch protoc_middleman
protoc_middleman2: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs)
oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs_proto2) )
touch protoc_middleman
endif
# Protocol Buffers Benchmarks
This directory contains benchmarking schemas and data sets that you
can use to test a variety of performance scenarios against your
protobuf language runtime.
The schema for the datasets is described in `benchmarks.proto`.
Generate the data sets like so:
```
$ make
$ ./generate-datasets
Wrote dataset: dataset.google_message1_proto3.pb
Wrote dataset: dataset.google_message1_proto2.pb
Wrote dataset: dataset.google_message2.pb
$
```
Each data set will be written to its own file. Benchmarks will
likely want to run several benchmarks against each data set (parse,
serialize, possibly JSON, possibly using different APIs, etc).
We would like to add more data sets. In general we will favor data sets
that make the overall suite diverse without being too large or having
too many similar tests. Ideally everyone can run through the entire
suite without the test run getting too long.
// Benchmark messages for proto2.
syntax = "proto2";
package benchmarks;
package benchmarks.proto2;
option java_package = "com.google.protobuf.benchmarks";
option java_outer_classname = "GoogleSpeed";
// This is the default, but we specify it here explicitly.
option optimize_for = SPEED;
message SpeedMessage1 {
message GoogleMessage1 {
required string field1 = 1;
optional string field9 = 9;
optional string field18 = 18;
......@@ -40,7 +43,7 @@ message SpeedMessage1 {
optional int32 field23 = 23 [default=0];
optional bool field24 = 24 [default=false];
optional int32 field25 = 25 [default=0];
optional SpeedMessage1SubMessage field15 = 15;
optional GoogleMessage1SubMessage field15 = 15;
optional bool field78 = 78;
optional int32 field67 = 67 [default=0];
optional int32 field68 = 68;
......@@ -49,7 +52,7 @@ message SpeedMessage1 {
optional int32 field131 = 131 [default=0];
}
message SpeedMessage1SubMessage {
message GoogleMessage1SubMessage {
optional int32 field1 = 1 [default=0];
optional int32 field2 = 2 [default=0];
optional int32 field3 = 3 [default=0];
......@@ -72,7 +75,7 @@ message SpeedMessage1SubMessage {
optional uint64 field300 = 300;
}
message SpeedMessage2 {
message GoogleMessage2 {
optional string field1 = 1;
optional int64 field3 = 3;
optional int64 field4 = 4;
......@@ -112,7 +115,7 @@ message SpeedMessage2 {
repeated int32 field73 = 73;
optional int32 field20 = 20 [default=0];
optional string field24 = 24;
optional SpeedMessage2GroupedMessage field31 = 31;
optional GoogleMessage2GroupedMessage field31 = 31;
}
repeated string field128 = 128;
optional int64 field131 = 131;
......@@ -123,7 +126,7 @@ message SpeedMessage2 {
optional bool field206 = 206 [default=false];
}
message SpeedMessage2GroupedMessage {
message GoogleMessage2GroupedMessage {
optional float field1 = 1;
optional float field2 = 2;
optional float field3 = 3 [default=0.0];
......
// Benchmark messages for proto3.
syntax = "proto3";
package benchmarks.proto3;
option java_package = "com.google.protobuf.benchmarks";
// This is the default, but we specify it here explicitly.
option optimize_for = SPEED;
message GoogleMessage1 {
string field1 = 1;
string field9 = 9;
string field18 = 18;
bool field80 = 80;
bool field81 = 81;
int32 field2 = 2;
int32 field3 = 3;
int32 field280 = 280;
int32 field6 = 6;
int64 field22 = 22;
string field4 = 4;
repeated fixed64 field5 = 5;
bool field59 = 59;
string field7 = 7;
int32 field16 = 16;
int32 field130 = 130;
bool field12 = 12;
bool field17 = 17;
bool field13 = 13;
bool field14 = 14;
int32 field104 = 104;
int32 field100 = 100;
int32 field101 = 101;
string field102 = 102;
string field103 = 103;
int32 field29 = 29;
bool field30 = 30;
int32 field60 = 60;
int32 field271 = 271;
int32 field272 = 272;
int32 field150 = 150;
int32 field23 = 23;
bool field24 = 24;
int32 field25 = 25;
GoogleMessage1SubMessage field15 = 15;
bool field78 = 78;
int32 field67 = 67;
int32 field68 = 68;
int32 field128 = 128;
string field129 = 129;
int32 field131 = 131;
}
message GoogleMessage1SubMessage {
int32 field1 = 1;
int32 field2 = 2;
int32 field3 = 3;
string field15 = 15;
bool field12 = 12;
int64 field13 = 13;
int64 field14 = 14;
int32 field16 = 16;
int32 field19 = 19;
bool field20 = 20;
bool field28 = 28;
fixed64 field21 = 21;
int32 field22 = 22;
bool field23 = 23;
bool field206 = 206;
fixed32 field203 = 203;
int32 field204 = 204;
string field205 = 205;
uint64 field207 = 207;
uint64 field300 = 300;
}
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto3";
package benchmarks;
option java_package = "com.google.protobuf.benchmarks";
message BenchmarkDataset {
// Name of the benchmark dataset. This should be unique across all datasets.
// Should only contain word characters: [a-zA-Z0-9_]
string name = 1;
// Fully-qualified name of the protobuf message for this dataset.
// It will be one of the messages defined benchmark_messages_proto2.proto
// or benchmark_messages_proto3.proto.
//
// Implementations that do not support reflection can implement this with
// an explicit "if/else" chain that lists every known message defined
// in those files.
string message_name = 2;
// The payload(s) for this dataset. They should be parsed or serialized
// in sequence, in a loop, ie.
//
// while (!benchmarkDone) { // Benchmark runner decides when to exit.
// for (i = 0; i < benchmark.payload.length; i++) {
// parse(benchmark.payload[i])
// }
// }
//
// This is intended to let datasets include a variety of data to provide
// potentially more realistic results than just parsing the same message
// over and over. A single message parsed repeatedly could yield unusually
// good branch prediction performance.
repeated bytes payload = 3;
}
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <fstream>
#include <iostream>
#include "benchmarks.pb.h"
using benchmarks::BenchmarkDataset;
using google::protobuf::Descriptor;
using google::protobuf::DescriptorPool;
using google::protobuf::Message;
using google::protobuf::MessageFactory;
std::set<std::string> names;
const char *file_prefix = "dataset.";
const char *file_suffix = ".pb";
void WriteFileWithPayloads(const std::string& name,
const std::string& message_name,
const std::vector<std::string>& payload) {
if (!names.insert(name).second) {
std::cerr << "Duplicate test name: " << name << "\n";
abort();
}
// First verify that this message name exists in our set of benchmark messages
// and that these payloads are valid for the given message.
const Descriptor* d =
DescriptorPool::generated_pool()->FindMessageTypeByName(message_name);
if (!d) {
std::cerr << "For dataset " << name << ", no such message: "
<< message_name << "\n";
abort();
}
Message* m = MessageFactory::generated_factory()->GetPrototype(d)->New();
for (size_t i = 0; i < payload.size(); i++) {
if (!m->ParseFromString(payload[i])) {
std::cerr << "For dataset " << name << ", payload[" << i << "] fails "
<< "to parse\n";
abort();
}
}
BenchmarkDataset dataset;
dataset.set_name(name);
dataset.set_message_name(message_name);
for (size_t i = 0; i < payload.size(); i++) {
dataset.add_payload()->assign(payload[i]);
}
std::ofstream writer;
std::string fname = file_prefix + name + file_suffix;
writer.open(fname.c_str());
dataset.SerializeToOstream(&writer);
writer.close();
std::cerr << "Wrote dataset: " << fname << "\n";
}
void WriteFile(const std::string& name, const std::string& message_name,
const std::string& payload) {
std::vector<std::string> payloads;
payloads.push_back(payload);
WriteFileWithPayloads(name, message_name, payloads);
}
std::string ReadFile(const std::string& name) {
std::ifstream file(name.c_str());
GOOGLE_CHECK(file.is_open()) << "Couldn't find file '" << name <<
"', please make sure you are running "
"this command from the benchmarks/ "
"directory.\n";
return std::string((std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
}
int main() {
WriteFile("google_message1_proto3", "benchmarks.proto3.GoogleMessage1",
ReadFile("google_message1.dat"));
WriteFile("google_message1_proto2", "benchmarks.proto2.GoogleMessage1",
ReadFile("google_message1.dat"));
// Not in proto3 because it has a group, which is not supported.
WriteFile("google_message2", "benchmarks.proto2.GoogleMessage2",
ReadFile("google_message2.dat"));
}
......@@ -180,5 +180,5 @@ export CFLAGS
export CXXFLAGS
AC_CONFIG_SUBDIRS([gmock])
AC_CONFIG_FILES([Makefile src/Makefile conformance/Makefile protobuf.pc protobuf-lite.pc])
AC_CONFIG_FILES([Makefile src/Makefile benchmarks/Makefile conformance/Makefile protobuf.pc protobuf-lite.pc])
AC_OUTPUT
......@@ -36,6 +36,9 @@ build_cpp() {
internal_build_cpp
make check -j2
cd conformance && make test_cpp && cd ..
# Verify benchmarking code can build successfully.
cd benchmarks && make && ./generate-datasets && cd ..
}
build_cpp_distcheck() {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment