Commit c703061d authored by Yilun Chong's avatar Yilun Chong

Add gogo benchmark

parent ed4321d1
This diff is collapsed.
package main package main
import ( import (
benchmarkWrapper "./tmp" benchmarkWrapper "../tmp"
googleMessage1Proto2 "./tmp/datasets/google_message1/proto2" googleMessage1Proto2 "../tmp/datasets/google_message1/proto2"
googleMessage1Proto3 "./tmp/datasets/google_message1/proto3" googleMessage1Proto3 "../tmp/datasets/google_message1/proto3"
googleMessage2 "./tmp/datasets/google_message2" googleMessage2 "../tmp/datasets/google_message2"
googleMessage3 "./tmp/datasets/google_message3" googleMessage3 "../tmp/datasets/google_message3"
googleMessage4 "./tmp/datasets/google_message4" googleMessage4 "../tmp/datasets/google_message4"
"flag" "flag"
"github.com/golang/protobuf/proto" "github.com/golang/protobuf/proto"
"io/ioutil" "io/ioutil"
......
...@@ -2,21 +2,35 @@ import sys ...@@ -2,21 +2,35 @@ import sys
import os import os
import timeit import timeit
import math import math
import argparse
import fnmatch import fnmatch
import json
parser = argparse.ArgumentParser(description="Python protobuf benchmark")
parser.add_argument("data_files", metavar="dataFile", nargs="+",
help="testing data files.")
parser.add_argument("--json", action="store_const", dest="json",
const="yes", default="no",
help="Whether to output json results")
parser.add_argument("--behavior_prefix", dest="behavior_prefix",
help="The output json format's behavior's name's prefix",
default="")
# BEGIN CPP GENERATED MESSAGE
parser.add_argument("--cpp_generated", action="store_const",
dest="cpp_generated", const="yes", default="no",
help="Whether to link generated code library")
# END CPP GENERATED MESSAGE
args = parser.parse_args()
# BEGIN CPP GENERATED MESSAGE # BEGIN CPP GENERATED MESSAGE
# CPP generated code must be linked before importing the generated Python code # CPP generated code must be linked before importing the generated Python code
# for the descriptor can be found in the pool # for the descriptor can be found in the pool
if len(sys.argv) < 2: if args.cpp_generated != "no":
raise IOError("Need string argument \"true\" or \"false\" for whether to use cpp generated code")
if sys.argv[1] == "true":
sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/.libs" ) sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/.libs" )
import libbenchmark_messages import libbenchmark_messages
sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/tmp" ) sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/tmp" )
elif sys.argv[1] != "false":
raise IOError("Need string argument \"true\" or \"false\" for whether to use cpp generated code")
# END CPP GENERATED MESSAGE # END CPP GENERATED MESSAGE
import datasets.google_message1.proto2.benchmark_message1_proto2_pb2 as benchmark_message1_proto2_pb2 import datasets.google_message1.proto2.benchmark_message1_proto2_pb2 as benchmark_message1_proto2_pb2
import datasets.google_message1.proto3.benchmark_message1_proto3_pb2 as benchmark_message1_proto3_pb2 import datasets.google_message1.proto3.benchmark_message1_proto3_pb2 as benchmark_message1_proto3_pb2
import datasets.google_message2.benchmark_message2_pb2 as benchmark_message2_pb2 import datasets.google_message2.benchmark_message2_pb2 as benchmark_message2_pb2
...@@ -26,19 +40,24 @@ import benchmarks_pb2 as benchmarks_pb2 ...@@ -26,19 +40,24 @@ import benchmarks_pb2 as benchmarks_pb2
def run_one_test(filename): def run_one_test(filename):
data = open(os.path.dirname(sys.argv[0]) + "/../" + filename).read() data = open(filename).read()
benchmark_dataset = benchmarks_pb2.BenchmarkDataset() benchmark_dataset = benchmarks_pb2.BenchmarkDataset()
benchmark_dataset.ParseFromString(data) benchmark_dataset.ParseFromString(data)
benchmark_util = Benchmark(full_iteration=len(benchmark_dataset.payload), benchmark_util = Benchmark(full_iteration=len(benchmark_dataset.payload),
module="py_benchmark", module="py_benchmark",
setup_method="init") setup_method="init")
print "Message %s of dataset file %s" % \ result={}
(benchmark_dataset.message_name, filename) result["filename"] = filename
result["message_name"] = benchmark_dataset.message_name
result["benchmarks"] = {}
benchmark_util.set_test_method("parse_from_benchmark") benchmark_util.set_test_method("parse_from_benchmark")
print benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename)) result["benchmarks"][args.behavior_prefix + "_parse_from_benchmark"] = \
benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename))
benchmark_util.set_test_method("serialize_to_benchmark") benchmark_util.set_test_method("serialize_to_benchmark")
print benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename)) result["benchmarks"][args.behavior_prefix + "_serialize_to_benchmark"] = \
print "" benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename))
return result
def init(filename): def init(filename):
global benchmark_dataset, message_class, message_list, counter global benchmark_dataset, message_class, message_list, counter
...@@ -66,11 +85,13 @@ def init(filename): ...@@ -66,11 +85,13 @@ def init(filename):
temp.ParseFromString(one_payload) temp.ParseFromString(one_payload)
message_list.append(temp) message_list.append(temp)
def parse_from_benchmark(): def parse_from_benchmark():
global counter, message_class, benchmark_dataset global counter, message_class, benchmark_dataset
m = message_class().ParseFromString(benchmark_dataset.payload[counter % len(benchmark_dataset.payload)]) m = message_class().ParseFromString(benchmark_dataset.payload[counter % len(benchmark_dataset.payload)])
counter = counter + 1 counter = counter + 1
def serialize_to_benchmark(): def serialize_to_benchmark():
global counter, message_list, message_class global counter, message_list, message_class
s = message_list[counter % len(benchmark_dataset.payload)].SerializeToString() s = message_list[counter % len(benchmark_dataset.payload)].SerializeToString()
...@@ -108,11 +129,22 @@ class Benchmark: ...@@ -108,11 +129,22 @@ class Benchmark:
t = timeit.timeit(stmt="%s(%s)" % (self.test_method, test_method_args), t = timeit.timeit(stmt="%s(%s)" % (self.test_method, test_method_args),
setup=self.full_setup_code(setup_method_args), setup=self.full_setup_code(setup_method_args),
number=reps); number=reps);
return "Average time for %s: %.2f ns" % \ return 1.0 * t / reps * (10 ** 9)
(self.test_method, 1.0 * t / reps * (10 ** 9))
if __name__ == "__main__": if __name__ == "__main__":
for i in range(2, len(sys.argv)): results = []
run_one_test(sys.argv[i]) for file in args.data_files:
results.append(run_one_test(file))
if args.json != "no":
print json.dumps(results)
else:
for result in results:
print "Message %s of dataset file %s" % \
(result["message_name"], result["filename"])
print "Average time for parse_from_benchmark: %.2f ns" % \
(result["benchmarks"]["parse_from_benchmark"])
print "Average time for serialize_to_benchmark: %.2f ns" % \
(result["benchmarks"]["serialize_to_benchmark"])
print ""
#include <Python.h> #include <Python.h>
#include "benchmarks.pb.h" #include "benchmarks.pb.h"
#include "datasets/google_message1/benchmark_message1_proto2.pb.h" #include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h"
#include "datasets/google_message1/benchmark_message1_proto3.pb.h" #include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h"
#include "datasets/google_message2/benchmark_message2.pb.h" #include "datasets/google_message2/benchmark_message2.pb.h"
#include "datasets/google_message3/benchmark_message3.pb.h" #include "datasets/google_message3/benchmark_message3.pb.h"
#include "datasets/google_message4/benchmark_message4.pb.h" #include "datasets/google_message4/benchmark_message4.pb.h"
......
#include "benchmarks.pb.h"
#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h"
#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h"
#include "datasets/google_message2/benchmark_message2.pb.h"
#include "datasets/google_message3/benchmark_message3.pb.h"
#include "datasets/google_message4/benchmark_message4.pb.h"
#include "google/protobuf/message.h"
#include "google/protobuf/descriptor.h"
#include <fstream>
using google::protobuf::FieldDescriptor;
using google::protobuf::Message;
using google::protobuf::Reflection;
class DataGroupStripper {
public:
static void StripMessage(Message *message) {
std::vector<const FieldDescriptor*> set_fields;
const Reflection* reflection = message->GetReflection();
reflection->ListFields(*message, &set_fields);
for (size_t i = 0; i < set_fields.size(); i++) {
const FieldDescriptor* field = set_fields[i];
if (field->type() == FieldDescriptor::TYPE_GROUP) {
reflection->ClearField(message, field);
}
if (field->type() == FieldDescriptor::TYPE_MESSAGE) {
if (field->is_repeated()) {
for (int j = 0; j < reflection->FieldSize(*message, field); j++) {
StripMessage(reflection->MutableRepeatedMessage(message, field, j));
}
} else {
StripMessage(reflection->MutableMessage(message, field));
}
}
}
reflection->MutableUnknownFields(message)->Clear();
}
};
std::string ReadFile(const std::string& name) {
std::ifstream file(name.c_str());
GOOGLE_CHECK(file.is_open()) << "Couldn't find file '"
<< name
<< "', please make sure you are running this command from the benchmarks"
<< " directory.\n";
return std::string((std::istreambuf_iterator<char>(file)),
std::istreambuf_iterator<char>());
}
int main(int argc, char *argv[]) {
if (argc % 2 == 0 || argc == 1) {
std::cerr << "Usage: [input_files] [output_file_names] where " <<
"input_files are one to one mapping to output_file_names." <<
std::endl;
return 1;
}
for (int i = argc / 2; i > 0; i--) {
const std::string &input_file = argv[i];
const std::string &output_file = argv[i + argc / 2];
std::cerr << "Generating " << input_file
<< " to " << output_file << std::endl;
benchmarks::BenchmarkDataset dataset;
Message* message;
std::string dataset_payload = ReadFile(input_file);
GOOGLE_CHECK(dataset.ParseFromString(dataset_payload))
<< "Can' t parse data file " << input_file;
if (dataset.message_name() == "benchmarks.proto3.GoogleMessage1") {
message = new benchmarks::proto3::GoogleMessage1;
} else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage1") {
message = new benchmarks::proto2::GoogleMessage1;
} else if (dataset.message_name() == "benchmarks.proto2.GoogleMessage2") {
message = new benchmarks::proto2::GoogleMessage2;
} else if (dataset.message_name() ==
"benchmarks.google_message3.GoogleMessage3") {
message = new benchmarks::google_message3::GoogleMessage3;
} else if (dataset.message_name() ==
"benchmarks.google_message4.GoogleMessage4") {
message = new benchmarks::google_message4::GoogleMessage4;
} else {
std::cerr << "Unknown message type: " << dataset.message_name();
exit(1);
}
for (int i = 0; i < dataset.payload_size(); i++) {
message->ParseFromString(dataset.payload(i));
DataGroupStripper::StripMessage(message);
dataset.set_payload(i, message->SerializeAsString());
}
std::ofstream ofs(output_file);
ofs << dataset.SerializeAsString();
ofs.close();
}
return 0;
}
#include "google/protobuf/compiler/code_generator.h"
#include "google/protobuf/io/zero_copy_stream.h"
#include "google/protobuf/io/printer.h"
#include "google/protobuf/descriptor.h"
#include "google/protobuf/descriptor.pb.h"
#include "schema_proto2_to_proto3_util.h"
#include "google/protobuf/compiler/plugin.h"
using google::protobuf::FileDescriptorProto;
using google::protobuf::FileDescriptor;
using google::protobuf::DescriptorPool;
using google::protobuf::io::Printer;
using google::protobuf::util::SchemaGroupStripper;
using google::protobuf::util::SchemaAddZeroEnumValue;
namespace google {
namespace protobuf {
namespace compiler {
namespace {
string StripProto(string filename) {
if (filename.substr(filename.size() - 11) == ".protodevel") {
// .protodevel
return filename.substr(0, filename.size() - 11);
} else {
// .proto
return filename.substr(0, filename.size() - 6);
}
}
DescriptorPool new_pool_;
} // namespace
class GoGoProtoGenerator : public CodeGenerator {
public:
virtual bool GenerateAll(const std::vector<const FileDescriptor*>& files,
const string& parameter,
GeneratorContext* context,
string* error) const {
for (int i = 0; i < files.size(); i++) {
for (auto file : files) {
bool can_generate =
(new_pool_.FindFileByName(file->name()) == nullptr);
for (int j = 0; j < file->dependency_count(); j++) {
can_generate &= (new_pool_.FindFileByName(
file->dependency(j)->name()) != nullptr);
}
for (int j = 0; j < file->public_dependency_count(); j++) {
can_generate &= (new_pool_.FindFileByName(
file->public_dependency(j)->name()) != nullptr);
}
for (int j = 0; j < file->weak_dependency_count(); j++) {
can_generate &= (new_pool_.FindFileByName(
file->weak_dependency(j)->name()) != nullptr);
}
if (can_generate) {
Generate(file, parameter, context, error);
break;
}
}
}
return true;
}
virtual bool Generate(const FileDescriptor* file,
const string& parameter,
GeneratorContext* context,
string* error) const {
FileDescriptorProto new_file;
file->CopyTo(&new_file);
SchemaGroupStripper::StripFile(file, &new_file);
SchemaAddZeroEnumValue enum_scrubber;
enum_scrubber.ScrubFile(&new_file);
string filename = file->name();
string basename = StripProto(filename);
std::vector<std::pair<string,string>> option_pairs;
ParseGeneratorParameter(parameter, &option_pairs);
std::unique_ptr<google::protobuf::io::ZeroCopyOutputStream> output(
context->Open(basename + ".proto"));
string content = new_pool_.BuildFile(new_file)->DebugString();
Printer printer(output.get(), '$');
printer.WriteRaw(content.c_str(), content.size());
return true;
}
};
} // namespace compiler
} // namespace protobuf
} // namespace google
int main(int argc, char* argv[]) {
google::protobuf::compiler::GoGoProtoGenerator generator;
return google::protobuf::compiler::PluginMain(argc, argv, &generator);
}
#ifndef PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_
#define PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_
#include "google/protobuf/message.h"
#include "google/protobuf/descriptor.h"
#include "google/protobuf/descriptor.pb.h"
#include <sstream>
#include <algorithm>
using google::protobuf::Descriptor;
using google::protobuf::DescriptorProto;
using google::protobuf::FileDescriptorProto;
using google::protobuf::FieldDescriptorProto;
using google::protobuf::Message;
using google::protobuf::EnumValueDescriptorProto;
namespace google {
namespace protobuf {
namespace util {
class SchemaGroupStripper {
public:
static void StripFile(const FileDescriptor* old_file,
FileDescriptorProto *file) {
for (int i = file->mutable_message_type()->size() - 1; i >= 0; i--) {
if (IsMessageSet(old_file->message_type(i))) {
file->mutable_message_type()->DeleteSubrange(i, 1);
continue;
}
StripMessage(old_file->message_type(i), file->mutable_message_type(i));
}
for (int i = file->mutable_extension()->size() - 1; i >= 0; i--) {
auto field = old_file->extension(i);
if (field->type() == FieldDescriptor::TYPE_GROUP ||
IsMessageSet(field->message_type()) ||
IsMessageSet(field->containing_type())) {
file->mutable_extension()->DeleteSubrange(i, 1);
}
}
}
private:
static bool IsMessageSet(const Descriptor *descriptor) {
if (descriptor != nullptr
&& descriptor->options().message_set_wire_format()) {
return true;
}
return false;
}
static void StripMessage(const Descriptor *old_message,
DescriptorProto *new_message) {
for (int i = new_message->mutable_field()->size() - 1; i >= 0; i--) {
if (old_message->field(i)->type() == FieldDescriptor::TYPE_GROUP ||
IsMessageSet(old_message->field(i)->message_type())) {
new_message->mutable_field()->DeleteSubrange(i, 1);
}
}
for (int i = new_message->mutable_extension()->size() - 1; i >= 0; i--) {
auto field_type_name = new_message->mutable_extension(i)->type_name();
if (old_message->extension(i)->type() == FieldDescriptor::TYPE_GROUP ||
IsMessageSet(old_message->extension(i)->containing_type()) ||
IsMessageSet(old_message->extension(i)->message_type())) {
new_message->mutable_extension()->DeleteSubrange(i, 1);
}
}
for (int i = 0; i < new_message->mutable_nested_type()->size(); i++) {
StripMessage(old_message->nested_type(i),
new_message->mutable_nested_type(i));
}
}
};
class SchemaAddZeroEnumValue {
public:
SchemaAddZeroEnumValue()
: total_added_(0) {
}
void ScrubFile(FileDescriptorProto *file) {
for (int i = 0; i < file->enum_type_size(); i++) {
ScrubEnum(file->mutable_enum_type(i));
}
for (int i = 0; i < file->mutable_message_type()->size(); i++) {
ScrubMessage(file->mutable_message_type(i));
}
}
private:
void ScrubEnum(EnumDescriptorProto *enum_type) {
if (enum_type->value(0).number() != 0) {
bool has_zero = false;
for (int j = 0; j < enum_type->value().size(); j++) {
if (enum_type->value(j).number() == 0) {
EnumValueDescriptorProto temp_enum_value;
temp_enum_value.CopyFrom(enum_type->value(j));
enum_type->mutable_value(j)->CopyFrom(enum_type->value(0));
enum_type->mutable_value(0)->CopyFrom(temp_enum_value);
has_zero = true;
break;
}
}
if (!has_zero) {
enum_type->mutable_value()->Add();
for (int i = enum_type->mutable_value()->size() - 1; i > 0; i--) {
enum_type->mutable_value(i)->CopyFrom(
*enum_type->mutable_value(i - 1));
}
enum_type->mutable_value(0)->set_number(0);
enum_type->mutable_value(0)->set_name("ADDED_ZERO_VALUE_" +
std::to_string(total_added_++));
}
}
}
void ScrubMessage(DescriptorProto *message_type) {
for (int i = 0; i < message_type->mutable_enum_type()->size(); i++) {
ScrubEnum(message_type->mutable_enum_type(i));
}
for (int i = 0; i < message_type->mutable_nested_type()->size(); i++) {
ScrubMessage(message_type->mutable_nested_type(i));
}
}
int total_added_;
};
} // namespace util
} // namespace protobuf
} // namespace google
#endif // PROTOBUF_BENCHMARKS_UTIL_SCHEMA_PROTO2_TO_PROTO3_UTIL_H_
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment