Commit 2fc69b15 authored by Yilun Chong's avatar Yilun Chong

Add python benchmark

parent 43caa38d
...@@ -39,11 +39,11 @@ else ...@@ -39,11 +39,11 @@ else
# relative to srcdir, which may not be the same as the current directory when # relative to srcdir, which may not be the same as the current directory when
# building out-of-tree. # building out-of-tree.
protoc_middleman: make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs) protoc_middleman: make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs) $(well_known_type_protoc_inputs)
oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java $(benchmarks_protoc_inputs) ) oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java --python_out=$$oldpwd/tmp $(benchmarks_protoc_inputs) )
touch protoc_middleman touch protoc_middleman
protoc_middleman2: make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs) protoc_middleman2: make_tmp_dir $(top_srcdir)/src/protoc$(EXEEXT) $(benchmarks_protoc_inputs_proto2) $(well_known_type_protoc_inputs)
oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java $(benchmarks_protoc_inputs_proto2) ) oldpwd=`pwd` && ( cd $(srcdir) && $$oldpwd/../src/protoc$(EXEEXT) -I. -I$(top_srcdir)/src --cpp_out=$$oldpwd --java_out=$$oldpwd/tmp/java/src/main/java --python_out=$$oldpwd/tmp $(benchmarks_protoc_inputs_proto2) )
touch protoc_middleman2 touch protoc_middleman2
endif endif
...@@ -155,6 +155,75 @@ java: protoc_middleman protoc_middleman2 java-benchmark ...@@ -155,6 +155,75 @@ java: protoc_middleman protoc_middleman2 java-benchmark
############# JAVA RULES END ############## ############# JAVA RULES END ##############
############# PYTHON RULES ##############
python_add_init: protoc_middleman protoc_middleman2
all_file=`find tmp -type f -regex '.*\.py'` && \
for file in $${all_file[@]}; do \
path="$${file%/*}"; \
while true; do \
touch "$$path/__init__.py" && chmod +x "$$path/__init__.py"; \
if [[ $$path != *"/"* ]]; then break; fi; \
path=$${path%/*}; \
done \
done
python_cpp_pkg_flags = `pkg-config --cflags --libs python`
lib_LTLIBRARIES = libbenchmark_messages.la
libbenchmark_messages_la_SOURCES = python_benchmark_messages.cc
libbenchmark_messages_la_LIBADD = $(top_srcdir)/src/.libs/libprotobuf.la
libbenchmark_messages_la_LDFLAGS = -version-info 1:0:0 -export-dynamic
libbenchmark_messages_la_CPPFLAGS = -I$(top_srcdir)/src -I$(srcdir) $(python_cpp_pkg_flags)
libbenchmark_messages_la-libbenchmark_messages_la.$(OBJEXT): $(benchmarks_protoc_outputs_header) $(benchmarks_protoc_outputs_proto2_header) $(benchmarks_protoc_outputs) $(benchmarks_protoc_outputs_proto2)
nodist_libbenchmark_messages_la_SOURCES = \
$(benchmarks_protoc_outputs) \
$(benchmarks_protoc_outputs_proto2) \
$(benchmarks_protoc_outputs_proto2_header) \
$(benchmarks_protoc_outputs_header)
python-pure-python-benchmark: python_add_init
@echo "Writing shortcut script python-pure-python-benchmark..."
@echo '#! /bin/sh' > python-pure-python-benchmark
@echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-pure-python-benchmark
@echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-pure-python-benchmark
@echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'python\' >> python-pure-python-benchmark
@echo cp py_benchmark.py tmp >> python-pure-python-benchmark
@echo python tmp/py_benchmark.py false '$$@' >> python-pure-python-benchmark
@chmod +x python-pure-python-benchmark
python-cpp-reflection-benchmark: python_add_init
@echo "Writing shortcut script python-cpp-reflection-benchmark..."
@echo '#! /bin/sh' > python-cpp-reflection-benchmark
@echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-reflection-benchmark
@echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-reflection-benchmark
@echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'cpp\' >> python-cpp-reflection-benchmark
@echo cp py_benchmark.py tmp >> python-cpp-reflection-benchmark
@echo python tmp/py_benchmark.py false '$$@' >> python-cpp-reflection-benchmark
@chmod +x python-cpp-reflection-benchmark
python-cpp-generated-code-benchmark: python_add_init libbenchmark_messages.la
@echo "Writing shortcut script python-cpp-generated-code-benchmark..."
@echo '#! /bin/sh' > python-cpp-generated-code-benchmark
@echo export LD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-generated-code-benchmark
@echo export DYLD_LIBRARY_PATH=$(top_srcdir)/src/libprotobuf.la >> python-cpp-generated-code-benchmark
@echo export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=\'cpp\' >> python-cpp-generated-code-benchmark
@echo cp py_benchmark.py tmp >> python-cpp-generated-code-benchmark
@echo python tmp/py_benchmark.py true '$$@' >> python-cpp-generated-code-benchmark
@chmod +x python-cpp-generated-code-benchmark
python-pure-python: python-pure-python-benchmark
./python-pure-python-benchmark $(all_data)
python-cpp-reflection: python-cpp-reflection-benchmark
./python-cpp-reflection-benchmark $(all_data)
python-cpp-generated-code: python-cpp-generated-code-benchmark
./python-cpp-generated-code-benchmark $(all_data)
############# PYTHON RULES END ##############
MAINTAINERCLEANFILES = \ MAINTAINERCLEANFILES = \
Makefile.in Makefile.in
...@@ -168,7 +237,11 @@ CLEANFILES = \ ...@@ -168,7 +237,11 @@ CLEANFILES = \
protoc_middleman \ protoc_middleman \
protoc_middleman2 \ protoc_middleman2 \
javac_middleman \ javac_middleman \
java-benchmark java-benchmark \
python_cpp_proto_library \
python-pure-python-benchmark \
python-cpp-reflection-benchmark \
python-cpp-generated-code-benchmark
clean-local: clean-local:
-rm -rf tmp/* -rm -rf tmp/*
...@@ -17,12 +17,25 @@ We are using [google/benchmark](https://github.com/google/benchmark) as the ...@@ -17,12 +17,25 @@ We are using [google/benchmark](https://github.com/google/benchmark) as the
benchmark tool for testing cpp. This will be automaticly made during build the benchmark tool for testing cpp. This will be automaticly made during build the
cpp benchmark. cpp benchmark.
### JAVA ### Java
We're using maven to build the java benchmarks, which is the same as to build We're using maven to build the java benchmarks, which is the same as to build
the Java protobuf. There're no other tools need to install. We're using the Java protobuf. There're no other tools need to install. We're using
[google/caliper](https://github.com/google/caliper) as benchmark tool, which [google/caliper](https://github.com/google/caliper) as benchmark tool, which
can be automaticly included by maven. can be automaticly included by maven.
### Python
We're using python C++ API for testing the generated
CPP proto version of python protobuf, which is also a prerequisite for Python
protobuf cpp implementation. You need to install the correct version of Python
C++ extension package before run generated CPP proto version of Python
protobuf's benchmark. e.g. under Ubuntu, you need to
```
$ sudo apt-get install python-dev
$ sudo apt-get install python3-dev
```
And you also need to make sure `pkg-config` is installed.
### Big data ### Big data
There's some optional big testing data which is not included in the directory initially, you need to There's some optional big testing data which is not included in the directory initially, you need to
...@@ -38,34 +51,80 @@ After doing this the big data file will automaticly generated in the benchmark d ...@@ -38,34 +51,80 @@ After doing this the big data file will automaticly generated in the benchmark d
To run all the benchmark dataset: To run all the benchmark dataset:
For java: ### Java:
``` ```
$ make java $ make java
``` ```
For cpp: ### CPP:
``` ```
$ make cpp $ make cpp
``` ```
### Python:
We have three versions of python protobuf implementation: pure python, cpp reflection and
cpp generated code. To run these version benchmark, you need to:
#### Pure Python:
```
$ make python-pure-python
```
#### CPP reflection:
```
$ make python-cpp-reflection
```
#### CPP generated code:
```
$ make python-cpp-generated-code
```
To run a specific dataset: To run a specific dataset:
For java: ### Java:
``` ```
$ make java-benchmark $ make java-benchmark
$ ./java-benchmark $(specific generated dataset file name) [-- $(caliper option)] $ ./java-benchmark $(specific generated dataset file name) [-- $(caliper option)]
``` ```
For cpp: ### CPP:
``` ```
$ make cpp-benchmark $ make cpp-benchmark
$ ./cpp-benchmark $(specific generated dataset file name) $ ./cpp-benchmark $(specific generated dataset file name)
``` ```
### Python:
#### Pure Python:
```
$ make python-pure-python-benchmark
$ ./python-pure-python-benchmark $(specific generated dataset file name)
```
#### CPP reflection:
```
$ make python-cpp-reflection-benchmark
$ ./python-cpp-reflection-benchmark $(specific generated dataset file name)
```
#### CPP generated code:
```
$ make python-cpp-generated-code-benchmark
$ ./python-cpp-generated-code-benchmark $(specific generated dataset file name)
```
## Benchmark datasets ## Benchmark datasets
Each data set is in the format of benchmarks.proto: Each data set is in the format of benchmarks.proto:
......
import sys
import os
import timeit
import math
import fnmatch
# CPP generated code must be linked before importing the generated Python code
# for the descriptor can be found in the pool
if len(sys.argv) < 2:
raise IOError("Need string argument \"true\" or \"false\" for whether to use cpp generated code")
if sys.argv[1] == "true":
sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/.libs" )
import libbenchmark_messages
sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) + "/tmp" )
elif sys.argv[1] != "false":
raise IOError("Need string argument \"true\" or \"false\" for whether to use cpp generated code")
import datasets.google_message1.benchmark_message1_proto2_pb2 as benchmark_message1_proto2_pb2
import datasets.google_message1.benchmark_message1_proto3_pb2 as benchmark_message1_proto3_pb2
import datasets.google_message2.benchmark_message2_pb2 as benchmark_message2_pb2
import datasets.google_message3.benchmark_message3_pb2 as benchmark_message3_pb2
import datasets.google_message4.benchmark_message4_pb2 as benchmark_message4_pb2
import benchmarks_pb2
def run_one_test(filename):
data = open(os.path.dirname(sys.argv[0]) + "/../" + filename).read()
benchmark_dataset = benchmarks_pb2.BenchmarkDataset()
benchmark_dataset.ParseFromString(data)
benchmark_util = Benchmark(full_iteration=len(benchmark_dataset.payload),
module="py_benchmark",
setup_method="init")
print "Message %s of dataset file %s" % \
(benchmark_dataset.message_name, filename)
benchmark_util.set_test_method("parse_from_benchmark")
print benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename))
benchmark_util.set_test_method("serialize_to_benchmark")
print benchmark_util.run_benchmark(setup_method_args='"%s"' % (filename))
print ""
def init(filename):
global benchmark_dataset, message_class, message_list, counter
message_list=[]
counter = 0
data = open(os.path.dirname(sys.argv[0]) + "/../" + filename).read()
benchmark_dataset = benchmarks_pb2.BenchmarkDataset()
benchmark_dataset.ParseFromString(data)
if benchmark_dataset.message_name == "benchmarks.proto3.GoogleMessage1":
message_class = benchmark_message1_proto3_pb2.GoogleMessage1
elif benchmark_dataset.message_name == "benchmarks.proto2.GoogleMessage1":
message_class = benchmark_message1_proto2_pb2.GoogleMessage1
elif benchmark_dataset.message_name == "benchmarks.proto2.GoogleMessage2":
message_class = benchmark_message2_pb2.GoogleMessage2
elif benchmark_dataset.message_name == "benchmarks.google_message3.GoogleMessage3":
message_class = benchmark_message3_pb2.GoogleMessage3
elif benchmark_dataset.message_name == "benchmarks.google_message4.GoogleMessage4":
message_class = benchmark_message4_pb2.GoogleMessage4
else:
raise IOError("Message %s not found!" % (benchmark_dataset.message_name))
for one_payload in benchmark_dataset.payload:
temp = message_class()
temp.ParseFromString(one_payload)
message_list.append(temp)
def parse_from_benchmark():
global counter, message_class, benchmark_dataset
m = message_class().ParseFromString(benchmark_dataset.payload[counter % len(benchmark_dataset.payload)])
counter = counter + 1
def serialize_to_benchmark():
global counter, message_list, message_class
s = message_list[counter % len(benchmark_dataset.payload)].SerializeToString()
counter = counter + 1
class Benchmark:
def __init__(self, module=None, test_method=None,
setup_method=None, full_iteration = 1):
self.full_iteration = full_iteration
self.module = module
self.test_method = test_method
self.setup_method = setup_method
def set_test_method(self, test_method):
self.test_method = test_method
def full_setup_code(self, setup_method_args=''):
setup_code = ""
setup_code += "from %s import %s\n" % (self.module, self.test_method)
setup_code += "from %s import %s\n" % (self.module, self.setup_method)
setup_code += "%s(%s)\n" % (self.setup_method, setup_method_args)
return setup_code
def dry_run(self, test_method_args='', setup_method_args=''):
return timeit.timeit(stmt="%s(%s)" % (self.test_method, test_method_args),
setup=self.full_setup_code(setup_method_args),
number=self.full_iteration);
def run_benchmark(self, test_method_args='', setup_method_args=''):
reps = self.full_iteration;
t = self.dry_run(test_method_args, setup_method_args);
if t < 3 :
reps = int(math.ceil(3 / t)) * self.full_iteration
t = timeit.timeit(stmt="%s(%s)" % (self.test_method, test_method_args),
setup=self.full_setup_code(setup_method_args),
number=reps);
return "Average time for %s: %.2f ns" % \
(self.test_method, 1.0 * t / reps * (10 ** 9))
if __name__ == "__main__":
for i in range(2, len(sys.argv)):
run_one_test(sys.argv[i])
#include <Python.h>
#include "benchmarks.pb.h"
#include "datasets/google_message1/benchmark_message1_proto2.pb.h"
#include "datasets/google_message1/benchmark_message1_proto3.pb.h"
#include "datasets/google_message2/benchmark_message2.pb.h"
#include "datasets/google_message3/benchmark_message3.pb.h"
#include "datasets/google_message4/benchmark_message4.pb.h"
static PyMethodDef python_benchmark_methods[] = {
{NULL, NULL, 0, NULL} /* Sentinel */
};
PyMODINIT_FUNC
initlibbenchmark_messages() {
benchmarks::BenchmarkDataset().descriptor();
benchmarks::proto3::GoogleMessage1().descriptor();
benchmarks::proto2::GoogleMessage1().descriptor();
benchmarks::proto2::GoogleMessage2().descriptor();
benchmarks::google_message3::GoogleMessage3().descriptor();
benchmarks::google_message4::GoogleMessage4().descriptor();
PyObject *m;
m = Py_InitModule("libbenchmark_messages", python_benchmark_methods);
if (m == NULL)
return;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment