Merge pull request #4361 from BSBandme/go_benchmark

Add go benchmark

Merge pull request #4361 from BSBandme/go_benchmark
Add go benchmark
e436ee0a · Yilun Chong · GitHub · 88a4884b · d6323c8c · e436ee0a
Unverified Commit e436ee0a authored 6 years ago by Yilun Chong Committed by GitHub 6 years ago
9 changed files
--- a/benchmarks/Makefile.am
+++ b/benchmarks/Makefile.am
+benchmarks_protoc_inputs_benchmark_wrapper =                               \
+	benchmarks.proto
+
 benchmarks_protoc_inputs =                                                 \
-	benchmarks.proto                                                         \
-	datasets/google_message1/benchmark_message1_proto3.proto
+	datasets/google_message1/proto3/benchmark_message1_proto3.proto

 benchmarks_protoc_inputs_proto2 =                                          \
-	datasets/google_message1/benchmark_message1_proto2.proto                 \
+	datasets/google_message1/proto2/benchmark_message1_proto2.proto          \
 	datasets/google_message2/benchmark_message2.proto                        \
 	datasets/google_message3/benchmark_message3.proto                        \
 	datasets/google_message3/benchmark_message3_1.proto                      \
@@ -26,7 +28,7 @@ make_tmp_dir:
 if USE_EXTERNAL_PROTOC

 protoc_middleman: make_tmp_dir $(benchmarks_protoc_inputs)
-	$(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. --java_out=./tmp $(benchmarks_protoc_inputs)
+	$(PROTOC) -I$(srcdir) -I$(top_srcdir) --cpp_out=. --java_out=./tmp $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper)
 	touch protoc_middleman

 protoc_middleman2: make_tmp_dir $(benchmarks_protoc_inputs_proto2)
@@ -38,8 +40,8 @@ else
 # We have to cd to $(srcdir) before executing protoc because $(protoc_inputs) is
 # relative to srcdir, which may not be the same as the current directory when
 # building out-of-tree.
-protoc_middleman: make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs)
-	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java --python_out=$$oldpwd/tmp $(benchmarks_protoc_inputs) )
+protoc_middleman: make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper)
+	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java --python_out=$$oldpwd/tmp $(benchmarks_protoc_inputs) $(benchmarks_protoc_inputs_benchmark_wrapper) )
 	touch protoc_middleman

 protoc_middleman2:  make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs)
@@ -54,14 +56,14 @@ all_data = `find . -type f -name "dataset.*.pb"`

 benchmarks_protoc_outputs =                                                \
 	benchmarks.pb.cc                                                         \
-	datasets/google_message1/benchmark_message1_proto3.pb.cc
+	datasets/google_message1/proto3/benchmark_message1_proto3.pb.cc
  
 benchmarks_protoc_outputs_header =                                         \
 	benchmarks.pb.h                                                          \
-	datasets/google_message1/benchmark_message1_proto3.pb.h
+	datasets/google_message1/proto3/benchmark_message1_proto3.pb.h

 benchmarks_protoc_outputs_proto2_header =                                  \
-	datasets/google_message1/benchmark_message1_proto2.pb.h                  \
+	datasets/google_message1/proto2/benchmark_message1_proto2.pb.h           \
 	datasets/google_message2/benchmark_message2.pb.h                         \
 	datasets/google_message3/benchmark_message3.pb.h                         \
 	datasets/google_message3/benchmark_message3_1.pb.h                       \
@@ -78,7 +80,7 @@ benchmarks_protoc_outputs_proto2_header =                                  \
 	datasets/google_message4/benchmark_message4_3.pb.h

 benchmarks_protoc_outputs_proto2 =                                         \
-	datasets/google_message1/benchmark_message1_proto2.pb.cc                 \
+	datasets/google_message1/proto2/benchmark_message1_proto2.pb.cc          \
 	datasets/google_message2/benchmark_message2.pb.cc                        \
 	datasets/google_message3/benchmark_message3.pb.cc                        \
 	datasets/google_message3/benchmark_message3_1.pb.cc                      \
@@ -224,6 +226,78 @@ python-cpp-generated-code: python-cpp-generated-code-benchmark

 ############# PYTHON RULES END ##############

+############# GO RULES BEGIN ##############
+
+benchmarks_protoc_inputs_proto2_message1 =                                 \
+	datasets/google_message1/proto2/benchmark_message1_proto2.proto
+
+benchmarks_protoc_inputs_proto2_message2 =                                 \
+	datasets/google_message2/benchmark_message2.proto
+
+benchmarks_protoc_inputs_proto2_message3 =                                 \
+	datasets/google_message3/benchmark_message3.proto                        \
+	datasets/google_message3/benchmark_message3_1.proto                      \
+	datasets/google_message3/benchmark_message3_2.proto                      \
+	datasets/google_message3/benchmark_message3_3.proto                      \
+	datasets/google_message3/benchmark_message3_4.proto                      \
+	datasets/google_message3/benchmark_message3_5.proto                      \
+	datasets/google_message3/benchmark_message3_6.proto                      \
+	datasets/google_message3/benchmark_message3_7.proto                      \
+	datasets/google_message3/benchmark_message3_8.proto
+
+benchmarks_protoc_inputs_proto2_message4 =                                 \
+	datasets/google_message4/benchmark_message4.proto                        \
+	datasets/google_message4/benchmark_message4_1.proto                      \
+	datasets/google_message4/benchmark_message4_2.proto                      \
+	datasets/google_message4/benchmark_message4_3.proto
+
+if USE_EXTERNAL_PROTOC
+
+go_protoc_middleman: make_tmp_dir $(benchmarks_protoc_inputs)
+	$(PROTOC) -I$(srcdir) -I$(top_srcdir) --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs) 
+	$(PROTOC) -I$(srcdir) -I$(top_srcdir) --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_benchmark_wrapper)
+	touch protoc_middleman
+
+go_protoc_middleman2: make_tmp_dir $(benchmarks_protoc_inputs_proto2_message1) $(benchmarks_protoc_inputs_proto2_message2) $(benchmarks_protoc_inputs_proto2_message3) $(benchmarks_protoc_inputs_proto2_message4)
+	$(PROTOC) -I$(srcdir) -I$(top_srcdir) --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2_message1)
+	$(PROTOC) -I$(srcdir) -I$(top_srcdir) --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2_message2)
+	$(PROTOC) -I$(srcdir) -I$(top_srcdir) --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2_message3)
+	$(PROTOC) -I$(srcdir) -I$(top_srcdir) --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2_message4)
+	touch protoc_middleman2
+
+else
+
+# We have to cd to $(srcdir) before executing protoc because $(protoc_inputs) is
+# relative to srcdir, which may not be the same as the current directory when
+# building out-of-tree.
+go_protoc_middleman: make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs)
+	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs) )
+	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_benchmark_wrapper) )
+	touch protoc_middleman
+
+go_protoc_middleman2:  make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2_message1) $(benchmarks_protoc_inputs_proto2_message2) $(benchmarks_protoc_inputs_proto2_message3) $(benchmarks_protoc_inputs_proto2_message4) $(well_known_type_protoc_inputs)
+	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2_message1) )
+	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2_message2) )
+	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2_message3) )
+	oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --go_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2_message4) )
+	touch protoc_middleman2
+
+endif
+
+go-benchmark: go_protoc_middleman go_protoc_middleman2 
+	@echo "Writing shortcut script go-benchmark..."
+	@echo '#! /bin/sh' > go-benchmark
+	@echo 'mkdir tmp_cc && mv *.cc tmp_cc' >> go-benchmark
+	@echo 'go test -bench=. -- $$@' >> go-benchmark
+	@echo 'mv tmp_cc/* . && rm -rf tmp_cc' >> go-benchmark
+	@chmod +x go-benchmark
+
+go: go_protoc_middleman go_protoc_middleman2 go-benchmark
+	./go-benchmark $(all_data)
+
+############# GO RULES END ##############
+
+
 MAINTAINERCLEANFILES =                                                     \
 	Makefile.in

@@ -241,7 +315,10 @@ CLEANFILES =                                                               \
 	python_cpp_proto_library                                                 \
 	python-pure-python-benchmark                                             \
 	python-cpp-reflection-benchmark                                          \
-	python-cpp-generated-code-benchmark
+	python-cpp-generated-code-benchmark                                      \
+	go-benchmark                                                             \
+	go_protoc_middleman                                                      \
+	go_protoc_middleman2

 clean-local:
 	-rm -rf tmp/*
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -36,6 +36,21 @@ $ sudo apt-get install python3-dev
 ```
 And you also need to make sure `pkg-config` is installed.

+### Go
+Go protobufs are maintained at [github.com/golang/protobuf](
+http://github.com/golang/protobuf). If not done already, you need to install the 
+toolchain and the Go protoc-gen-go plugin for protoc. 
+
+To install protoc-gen-go, run:
+
+```
+$ go get -u github.com/golang/protobuf/protoc-gen-go
+$ export PATH=$PATH:$(go env GOPATH)/bin
+```
+
+The first command installs `protoc-gen-go` into the `bin` directory in your local `GOPATH`.
+The second command adds the `bin` directory to your `PATH` so that `protoc` can locate the plugin later.
+
 ### Big data

 There's some optional big testing data which is not included in the directory
@@ -87,6 +102,11 @@ $ make python-cpp-reflection
 $ make python-cpp-generated-code
 ```

+### Go
+```
+$ make go
+```
+
 To run a specific dataset:

 ### Java:
@@ -126,6 +146,13 @@ $ make python-cpp-generated-code-benchmark
 $ ./python-cpp-generated-code-benchmark $(specific generated dataset file name)
 ```

+### Go:
+```
+$ make go-benchmark
+$ ./go-benchmark $(specific generated dataset file name)
+```
+
+
 ## Benchmark datasets

 Each data set is in the format of benchmarks.proto:

--- a/benchmarks/cpp_benchmark.cc
+++ b/benchmarks/cpp_benchmark.cc
@@ -32,8 +32,8 @@
 #include <iostream>
 #include "benchmark/benchmark_api.h"
 #include "benchmarks.pb.h"
-#include "datasets/google_message1/benchmark_message1_proto2.pb.h"
-#include "datasets/google_message1/benchmark_message1_proto3.pb.h"
+#include "datasets/google_message1/proto2/benchmark_message1_proto2.pb.h"
+#include "datasets/google_message1/proto3/benchmark_message1_proto3.pb.h"
 #include "datasets/google_message2/benchmark_message2.pb.h"
 #include "datasets/google_message3/benchmark_message3.pb.h"
 #include "datasets/google_message4/benchmark_message4.pb.h"

--- a/benchmarks/datasets/google_message1/benchmark_message1_proto2.proto
+++ b/benchmarks/datasets/google_message1/benchmark_message1_proto2.proto
--- a/benchmarks/datasets/google_message1/dataset.google_message1_proto2.pb
+++ b/benchmarks/datasets/google_message1/dataset.google_message1_proto2.pb
--- a/benchmarks/datasets/google_message1/benchmark_message1_proto3.proto
+++ b/benchmarks/datasets/google_message1/benchmark_message1_proto3.proto
--- a/benchmarks/datasets/google_message1/dataset.google_message1_proto3.pb
+++ b/benchmarks/datasets/google_message1/dataset.google_message1_proto3.pb
--- a/benchmarks/go_benchmark_test.go
+++ b/benchmarks/go_benchmark_test.go
+package main
+
+import (
+	benchmarkWrapper "./tmp"
+	googleMessage1Proto2 "./tmp/datasets/google_message1/proto2"
+	googleMessage1Proto3 "./tmp/datasets/google_message1/proto3"
+	googleMessage2 "./tmp/datasets/google_message2"
+	googleMessage3 "./tmp/datasets/google_message3"
+	googleMessage4 "./tmp/datasets/google_message4"
+	"flag"
+	"github.com/golang/protobuf/proto"
+	"io/ioutil"
+	"testing"
+)
+
+// Data is returned by the Load function.
+type Dataset struct {
+	name        string
+	newMessage  func() proto.Message
+	marshaled   [][]byte
+	unmarshaled []proto.Message
+}
+
+var datasets []Dataset
+
+// This is used to getDefaultInstance for a message type.
+func generateNewMessageFunction(dataset benchmarkWrapper.BenchmarkDataset) func() proto.Message {
+	switch dataset.MessageName {
+	case "benchmarks.proto3.GoogleMessage1":
+		return func() proto.Message { return new(googleMessage1Proto3.GoogleMessage1) }
+	case "benchmarks.proto2.GoogleMessage1":
+		return func() proto.Message { return new(googleMessage1Proto2.GoogleMessage1) }
+	case "benchmarks.proto2.GoogleMessage2":
+		return func() proto.Message { return new(googleMessage2.GoogleMessage2) }
+	case "benchmarks.google_message3.GoogleMessage3":
+		return func() proto.Message { return new(googleMessage3.GoogleMessage3) }
+	case "benchmarks.google_message4.GoogleMessage4":
+		return func() proto.Message { return new(googleMessage4.GoogleMessage4) }
+	default:
+		panic("Unknown message type: " + dataset.MessageName)
+	}
+}
+
+func init() {
+	flag.Parse()
+	for _, f := range flag.Args() {
+		// Load the benchmark.
+		b, err := ioutil.ReadFile(f)
+		if err != nil {
+			panic(err)
+		}
+
+		// Parse the benchmark.
+		var dm benchmarkWrapper.BenchmarkDataset
+		if err := proto.Unmarshal(b, &dm); err != nil {
+			panic(err)
+		}
+
+		// Determine the concrete protobuf message type to use.
+		var ds Dataset
+		ds.newMessage = generateNewMessageFunction(dm)
+
+		// Unmarshal each test message.
+		for _, payload := range dm.Payload {
+			ds.marshaled = append(ds.marshaled, payload)
+			m := ds.newMessage()
+			if err := proto.Unmarshal(payload, m); err != nil {
+				panic(err)
+			}
+			ds.unmarshaled = append(ds.unmarshaled, m)
+		}
+		ds.name = f
+
+		datasets = append(datasets, ds)
+	}
+}
+
+func Benchmark(b *testing.B) {
+	for _, ds := range datasets {
+		b.Run(ds.name, func(b *testing.B) {
+			b.Run("Unmarshal", func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					for j, payload := range ds.marshaled {
+						out := ds.newMessage()
+						if err := proto.Unmarshal(payload, out); err != nil {
+							b.Fatalf("can't unmarshal message %d %v", j, err)
+						}
+					}
+				}
+			})
+			b.Run("Marshal", func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					for j, m := range ds.unmarshaled {
+						if _, err := proto.Marshal(m); err != nil {
+							b.Fatalf("can't marshal message %d %+v: %v", j, m, err)
+						}
+					}
+				}
+			})
+			b.Run("Size", func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					for _, m := range ds.unmarshaled {
+						proto.Size(m)
+					}
+				}
+			})
+			b.Run("Clone", func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					for _, m := range ds.unmarshaled {
+						proto.Clone(m)
+					}
+				}
+			})
+			b.Run("Merge", func(b *testing.B) {
+				for i := 0; i < b.N; i++ {
+					for _, m := range ds.unmarshaled {
+						out := ds.newMessage()
+						proto.Merge(out, m)
+					}
+				}
+			})
+		})
+	}
+}
--- a/benchmarks/py_benchmark.py
+++ b/benchmarks/py_benchmark.py
@@ -17,8 +17,8 @@ elif sys.argv[1] != "false":
  raise IOError("Need string argument \"true\" or \"false\" for whether to use cpp generated code")
 # END CPP GENERATED MESSAGE

-import datasets.google_message1.benchmark_message1_proto2_pb2 as benchmark_message1_proto2_pb2
-import datasets.google_message1.benchmark_message1_proto3_pb2 as benchmark_message1_proto3_pb2
+import datasets.google_message1.proto2.benchmark_message1_proto2_pb2 as benchmark_message1_proto2_pb2
+import datasets.google_message1.proto3.benchmark_message1_proto3_pb2 as benchmark_message1_proto3_pb2
 import datasets.google_message2.benchmark_message2_pb2 as benchmark_message2_pb2
 import datasets.google_message3.benchmark_message3_pb2 as benchmark_message3_pb2
 import datasets.google_message4.benchmark_message4_pb2 as benchmark_message4_pb2