Commit 2e831102 authored by Josh Haberman's avatar Josh Haberman

Added framework for generating/consuming benchmarking data sets.

This takes the code that was sitting in benchmarks/
already and makes it easier for language-specific
benchmarks to consume.  Future PRs will enhance this
so that the language-specific benchmarks can report
metrics back that will be tracked over time in PerfKit.
parent f53f9117
......@@ -9,7 +9,7 @@ AUTOMAKE_OPTIONS = foreign
SUBDIRS = . src
# Always include gmock in distributions.
DIST_SUBDIRS = $(subdirs) src conformance
DIST_SUBDIRS = $(subdirs) src conformance benchmarks
# Build gmock before we build protobuf tests. We don't add gmock to SUBDIRS
# because then "make check" would also build and run all of gmock's own tests,
......@@ -36,6 +36,10 @@ clean-local:
echo "Making clean in conformance"; \
cd conformance && $(MAKE) $(AM_MAKEFLAGS) clean; \
fi; \
if test -e benchmarks/Makefile; then \
echo "Making clean in benchmarks"; \
cd benchmarks && $(MAKE) $(AM_MAKEFLAGS) clean; \
fi; \
if test -e objectivec/DevTools; then \
echo "Cleaning any ObjC pyc files"; \
rm -f objectivec/DevTools/*.pyc; \
......
benchmarks_protoc_inputs = \
benchmarks.proto \
benchmark_messages_proto3.proto
benchmarks_protoc_inputs_proto2 = \
benchmark_messages_proto2.proto
benchmarks_protoc_outputs = \
benchmarks.pb.cc \
benchmarks.pb.h \
benchmark_messages_proto3.pb.cc \
benchmark_messages_proto3.pb.h
benchmarks_protoc_outputs_proto2 = \
benchmark_messages_proto2.pb.cc \
benchmark_messages_proto2.pb.h
bin_PROGRAMS = generate-datasets
generate_datasets_LDADD = $(top_srcdir)/src/libprotobuf.la
generate_datasets_SOURCES = generate_datasets.cc
generate_datasets_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir)
nodist_generate_datasets_SOURCES = \
google_message1.h \
google_message2.h \
$(benchmarks_protoc_outputs) \
$(benchmarks_protoc_outputs_proto2)
# Explicit deps beacuse BUILT_SOURCES are only done before a "make all/check"
# so a direct "make test_cpp" could fail if parallel enough.
generate_datasets-generate_datasets.$(OBJEXT): benchmarks.pb.h google_message1.h google_message2.h
$(benchmarks_protoc_outputs): protoc_middleman
$(benchmarks_protoc_outputs_proto2): protoc_middleman2
google_message1.h: google_message1.dat
xxd -i $< $@
google_message2.h: google_message2.dat
xxd -i $< $@
CLEANFILES = \
$(benchmarks_protoc_outputs) \
$(benchmarks_protoc_outputs_proto2) \
google_message1.h \
google_message2.h \
protoc_middleman \
protoc_middleman2 \
dataset.*
if USE_EXTERNAL_PROTOC
protoc_middleman: $(benchmarks_protoc_inputs)
$(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs)
touch protoc_middleman
protoc_middleman2: $(benchmarks_protoc_inputs_proto2)
$(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. $(benchmarks_protoc_inputs_proto2)
touch protoc_middleman2
else
# We have to cd to $(srcdir) before executing protoc because $(protoc_inputs) is
# relative to srcdir, which may not be the same as the current directory when
# building out-of-tree.
protoc_middleman: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs)
oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs) )
touch protoc_middleman
protoc_middleman2: $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs)
oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd $(benchmarks_protoc_inputs_proto2) )
touch protoc_middleman
endif
// Benchmark messages for proto2.
syntax = "proto2";
package benchmarks;
package benchmarks.p2;
option java_package = "com.google.protobuf.benchmarks";
option java_outer_classname = "GoogleSpeed";
// This is the default, but we specify it here explicitly.
option optimize_for = SPEED;
message SpeedMessage1 {
message GoogleMessage1 {
required string field1 = 1;
optional string field9 = 9;
optional string field18 = 18;
......@@ -40,7 +43,7 @@ message SpeedMessage1 {
optional int32 field23 = 23 [default=0];
optional bool field24 = 24 [default=false];
optional int32 field25 = 25 [default=0];
optional SpeedMessage1SubMessage field15 = 15;
optional GoogleMessage1SubMessage field15 = 15;
optional bool field78 = 78;
optional int32 field67 = 67 [default=0];
optional int32 field68 = 68;
......@@ -49,7 +52,7 @@ message SpeedMessage1 {
optional int32 field131 = 131 [default=0];
}
message SpeedMessage1SubMessage {
message GoogleMessage1SubMessage {
optional int32 field1 = 1 [default=0];
optional int32 field2 = 2 [default=0];
optional int32 field3 = 3 [default=0];
......@@ -72,7 +75,7 @@ message SpeedMessage1SubMessage {
optional uint64 field300 = 300;
}
message SpeedMessage2 {
message GoogleMessage2 {
optional string field1 = 1;
optional int64 field3 = 3;
optional int64 field4 = 4;
......@@ -112,7 +115,7 @@ message SpeedMessage2 {
repeated int32 field73 = 73;
optional int32 field20 = 20 [default=0];
optional string field24 = 24;
optional SpeedMessage2GroupedMessage field31 = 31;
optional GoogleMessage2GroupedMessage field31 = 31;
}
repeated string field128 = 128;
optional int64 field131 = 131;
......@@ -123,7 +126,7 @@ message SpeedMessage2 {
optional bool field206 = 206 [default=false];
}
message SpeedMessage2GroupedMessage {
message GoogleMessage2GroupedMessage {
optional float field1 = 1;
optional float field2 = 2;
optional float field3 = 3 [default=0.0];
......
// Benchmark messages for proto3.
syntax = "proto3";
package benchmarks.p3;
option java_package = "com.google.protobuf.benchmarks";
// This is the default, but we specify it here explicitly.
option optimize_for = SPEED;
message GoogleMessage1 {
string field1 = 1;
string field9 = 9;
string field18 = 18;
bool field80 = 80;
bool field81 = 81;
int32 field2 = 2;
int32 field3 = 3;
int32 field280 = 280;
int32 field6 = 6;
int64 field22 = 22;
string field4 = 4;
repeated fixed64 field5 = 5;
bool field59 = 59;
string field7 = 7;
int32 field16 = 16;
int32 field130 = 130;
bool field12 = 12;
bool field17 = 17;
bool field13 = 13;
bool field14 = 14;
int32 field104 = 104;
int32 field100 = 100;
int32 field101 = 101;
string field102 = 102;
string field103 = 103;
int32 field29 = 29;
bool field30 = 30;
int32 field60 = 60;
int32 field271 = 271;
int32 field272 = 272;
int32 field150 = 150;
int32 field23 = 23;
bool field24 = 24;
int32 field25 = 25;
GoogleMessage1SubMessage field15 = 15;
bool field78 = 78;
int32 field67 = 67;
int32 field68 = 68;
int32 field128 = 128;
string field129 = 129;
int32 field131 = 131;
}
message GoogleMessage1SubMessage {
int32 field1 = 1;
int32 field2 = 2;
int32 field3 = 3;
string field15 = 15;
bool field12 = 12;
int64 field13 = 13;
int64 field14 = 14;
int32 field16 = 16;
int32 field19 = 19;
bool field20 = 20;
bool field28 = 28;
fixed64 field21 = 21;
int32 field22 = 22;
bool field23 = 23;
bool field206 = 206;
fixed32 field203 = 203;
int32 field204 = 204;
string field205 = 205;
uint64 field207 = 207;
uint64 field300 = 300;
}
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
syntax = "proto3";
package benchmarks;
option java_package = "com.google.protobuf.benchmarks";
message BenchmarkDataset {
// Name of the benchmark dataset. This should be unique across all datasets.
// Should only contain word characters: [a-zA-Z0-9_]
string name = 1;
// Fully-qualified name of the protobuf message for this dataset.
// It will be one of the messages defined benchmark_messages.proto.
// Implementations that do not support reflection can implement this with
// an explicit "if/else" chain that lists every possible message defined
// in this file.
string message_name = 2;
// The payload(s) for this dataset. They should be parsed or serialized
// in sequence, in a loop, ie.
//
// while (!benchmarkDone) { // Benchmark runner decides when to exit.
// for (i = 0; i < benchmark.payload.length; i++) {
// parse(benchmark.payload[i])
// }
// }
//
// This is intended to let datasets include a variety of data to provide
// potentially more realistic results than just parsing the same message
// over and over. A single message parsed repeatedly could yield unusually
// good branch prediction performance.
repeated bytes payload = 3;
}
// A benchmark can write out metrics that we will then upload to our metrics
// database for tracking over time.
message Metric {
// A unique ID for these results. Used for de-duping.
string guid = 1;
// The tags specify exactly what benchmark was run against the dataset.
// The specific benchmark suite can decide what these mean, but here are
// some common tags that have a predefined meaning:
//
// - "dataset": for tests that pertain to a specific dataset.
//
// For example:
//
// # Tests parsing from binary proto string using arenas.
// tags={
// dataset: "testalltypes",
// op: "parse",
// format: "binaryproto",
// input: "string"
// arena: "true"
// }
//
// # Tests serializing to JSON string.
// tags={
// dataset: "testalltypes",
// op: "serialize",
// format: "json",
// input: "string"
// }
map<string, string> labels = 2;
// Unit of measurement for the metric:
// - a speed test might be "mb_per_second" or "ops_per_second"
// - a size test might be "kb".
string unit = 3;
// Metric value.
double value = 4;
}
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc. All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
const char *file_prefix = "dataset.";
const char *file_suffix = ".pb";
#include <fstream>
#include <iostream>
#include "benchmarks.pb.h"
#include "google_message1.h"
#include "google_message2.h"
using benchmarks::BenchmarkDataset;
using google::protobuf::Descriptor;
using google::protobuf::DescriptorPool;
using google::protobuf::Message;
using google::protobuf::MessageFactory;
#define ARRAY_TO_STRING(arr) std::string(arr, arr + sizeof(arr))
std::set<std::string> names;
void WriteFileWithPayloads(const std::string& name,
const std::string& message_name,
const std::vector<std::string>& payload) {
if (!names.insert(name).second) {
std::cerr << "Duplicate test name: " << name << "\n";
abort();
}
// First verify that this message name exists in our set of benchmark messages
// and that these payloads are valid for the given message.
const Descriptor* d =
DescriptorPool::generated_pool()->FindMessageTypeByName(message_name);
if (!d) {
std::cerr << "For dataset " << name << ", no such message: "
<< message_name << "\n";
abort();
}
Message* m = MessageFactory::generated_factory()->GetPrototype(d)->New();
for (size_t i = 0; i < payload.size(); i++) {
if (!m->ParseFromString(payload[i])) {
std::cerr << "For dataset " << name << ", payload[" << i << "] fails "
<< "to parse\n";
abort();
}
}
BenchmarkDataset dataset;
dataset.set_name(name);
dataset.set_message_name(message_name);
for (size_t i = 0; i < payload.size(); i++) {
dataset.add_payload()->assign(payload[i]);
}
std::string serialized;
dataset.SerializeToString(&serialized);
std::ofstream writer;
std::string fname = file_prefix + name + file_suffix;
writer.open(fname);
writer << serialized;
writer.close();
std::cerr << "Wrote dataset: " << fname << "\n";
}
void WriteFile(const std::string& name, const std::string& message_name,
const std::string& payload) {
std::vector<std::string> payloads;
payloads.push_back(payload);
WriteFileWithPayloads(name, message_name, payloads);
}
int main() {
WriteFile("google_message1_proto3", "benchmarks.p3.GoogleMessage1",
ARRAY_TO_STRING(google_message1_dat));
WriteFile("google_message1_proto2", "benchmarks.p2.GoogleMessage1",
ARRAY_TO_STRING(google_message1_dat));
// Not in proto3 because it has a group, which is not supported.
WriteFile("google_message2", "benchmarks.p2.GoogleMessage2",
ARRAY_TO_STRING(google_message2_dat));
}
......@@ -180,5 +180,5 @@ export CFLAGS
export CXXFLAGS
AC_CONFIG_SUBDIRS([gmock])
AC_CONFIG_FILES([Makefile src/Makefile conformance/Makefile protobuf.pc protobuf-lite.pc])
AC_CONFIG_FILES([Makefile src/Makefile benchmarks/Makefile conformance/Makefile protobuf.pc protobuf-lite.pc])
AC_OUTPUT
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment