Commit 3783a4ca authored by Yilun Chong's avatar Yilun Chong

Sync internal benchmark changes

parent d2980062
from __future__ import print_function
from __future__ import absolute_import
import argparse
import os
import re
import copy
import uuid
import calendar
import time
from . import big_query_utils
import datetime
import json
# This import depends on the automake rule protoc_middleman, please make sure # This import depends on the automake rule protoc_middleman, please make sure
# protoc_middleman has been built before run this file. # protoc_middleman has been built before run this file.
import os.path, sys import json
import re
import os.path
# BEGIN OPENSOURCE
import sys
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir)) sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))
# END OPENSOURCE
import tmp.benchmarks_pb2 as benchmarks_pb2 import tmp.benchmarks_pb2 as benchmarks_pb2
from click.types import STRING
_PROJECT_ID = 'grpc-testing' __file_size_map = {}
_DATASET = 'protobuf_benchmark_result'
_TABLE = 'opensource_result_v1'
_NOW = "%d%02d%02d" % (datetime.datetime.now().year,
datetime.datetime.now().month,
datetime.datetime.now().day)
file_size_map = {} def __get_data_size(filename):
if filename[0] != '/':
def get_data_size(file_name): filename = os.path.dirname(os.path.abspath(__file__)) + "/../" + filename
if file_name in file_size_map: if filename in __file_size_map:
return file_size_map[file_name] return __file_size_map[filename]
benchmark_dataset = benchmarks_pb2.BenchmarkDataset() benchmark_dataset = benchmarks_pb2.BenchmarkDataset()
benchmark_dataset.ParseFromString( benchmark_dataset.ParseFromString(
open(os.path.dirname(os.path.abspath(__file__)) + "/../" + file_name).read()) open(filename).read())
size = 0 size = 0
count = 0 count = 0
for payload in benchmark_dataset.payload: for payload in benchmark_dataset.payload:
size += len(payload) size += len(payload)
count += 1 count += 1
file_size_map[file_name] = (size, 1.0 * size / count) __file_size_map[filename] = (size, 1.0 * size / count)
return size, 1.0 * size / count return size, 1.0 * size / count
def extract_file_name(file_name): def __extract_file_name(file_name):
name_list = re.split("[/\.]", file_name) name_list = re.split("[/\.]", file_name)
short_file_name = "" short_file_name = ""
for name in name_list: for name in name_list:
...@@ -50,10 +37,7 @@ def extract_file_name(file_name): ...@@ -50,10 +37,7 @@ def extract_file_name(file_name):
return short_file_name return short_file_name
cpp_result = [] __results = []
python_result = []
java_result = []
go_result = []
# CPP results example: # CPP results example:
...@@ -70,8 +54,7 @@ go_result = [] ...@@ -70,8 +54,7 @@ go_result = []
# ], # ],
# ... # ...
# ] # ]
def parse_cpp_result(filename): def __parse_cpp_result(filename):
global cpp_result
if filename == "": if filename == "":
return return
if filename[0] != '/': if filename[0] != '/':
...@@ -82,9 +65,11 @@ def parse_cpp_result(filename): ...@@ -82,9 +65,11 @@ def parse_cpp_result(filename):
data_filename = "".join( data_filename = "".join(
re.split("(_parse_|_serialize)", benchmark["name"])[0]) re.split("(_parse_|_serialize)", benchmark["name"])[0])
behavior = benchmark["name"][len(data_filename) + 1:] behavior = benchmark["name"][len(data_filename) + 1:]
cpp_result.append({ if data_filename[:2] == "BM":
data_filename = data_filename[3:]
__results.append({
"language": "cpp", "language": "cpp",
"dataFileName": data_filename, "dataFilename": data_filename,
"behavior": behavior, "behavior": behavior,
"throughput": benchmark["bytes_per_second"] / 2.0 ** 20 "throughput": benchmark["bytes_per_second"] / 2.0 ** 20
}) })
...@@ -105,8 +90,7 @@ def parse_cpp_result(filename): ...@@ -105,8 +90,7 @@ def parse_cpp_result(filename):
# ], #pure-python # ], #pure-python
# ... # ...
# ] # ]
def parse_python_result(filename): def __parse_python_result(filename):
global python_result
if filename == "": if filename == "":
return return
if filename[0] != '/': if filename[0] != '/':
...@@ -115,11 +99,11 @@ def parse_python_result(filename): ...@@ -115,11 +99,11 @@ def parse_python_result(filename):
results_list = json.loads(f.read()) results_list = json.loads(f.read())
for results in results_list: for results in results_list:
for result in results: for result in results:
_, avg_size = get_data_size(result["filename"]) _, avg_size = __get_data_size(result["filename"])
for behavior in result["benchmarks"]: for behavior in result["benchmarks"]:
python_result.append({ __results.append({
"language": "python", "language": "python",
"dataFileName": extract_file_name(result["filename"]), "dataFilename": __extract_file_name(result["filename"]),
"behavior": behavior, "behavior": behavior,
"throughput": avg_size / "throughput": avg_size /
result["benchmarks"][behavior] * 1e9 / 2 ** 20 result["benchmarks"][behavior] * 1e9 / 2 ** 20
...@@ -157,8 +141,7 @@ def parse_python_result(filename): ...@@ -157,8 +141,7 @@ def parse_python_result(filename):
# }, # },
# ... # ...
# ] # ]
def parse_java_result(filename): def __parse_java_result(filename):
global average_bytes_per_message, java_result
if filename == "": if filename == "":
return return
if filename[0] != '/': if filename[0] != '/':
...@@ -172,13 +155,13 @@ def parse_java_result(filename): ...@@ -172,13 +155,13 @@ def parse_java_result(filename):
total_weight += measurement["weight"] total_weight += measurement["weight"]
total_value += measurement["value"]["magnitude"] total_value += measurement["value"]["magnitude"]
avg_time = total_value * 1.0 / total_weight avg_time = total_value * 1.0 / total_weight
total_size, _ = get_data_size( total_size, _ = __get_data_size(
result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"]) result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"])
java_result.append({ __results.append({
"language": "java", "language": "java",
"throughput": total_size / avg_time * 1e9 / 2 ** 20, "throughput": total_size / avg_time * 1e9 / 2 ** 20,
"behavior": result["scenario"]["benchmarkSpec"]["methodName"], "behavior": result["scenario"]["benchmarkSpec"]["methodName"],
"dataFileName": extract_file_name( "dataFilename": __extract_file_name(
result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"]) result["scenario"]["benchmarkSpec"]["parameters"]["dataFile"])
}) })
...@@ -194,8 +177,7 @@ def parse_java_result(filename): ...@@ -194,8 +177,7 @@ def parse_java_result(filename):
# Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Merge-12 300 4108632 ns/op # Benchmark/.././datasets/google_message2/dataset.google_message2.pb/Merge-12 300 4108632 ns/op
# PASS # PASS
# ok _/usr/local/google/home/yilunchong/mygit/protobuf/benchmarks 124.173s # ok _/usr/local/google/home/yilunchong/mygit/protobuf/benchmarks 124.173s
def parse_go_result(filename): def __parse_go_result(filename):
global go_result
if filename == "": if filename == "":
return return
if filename[0] != '/': if filename[0] != '/':
...@@ -208,85 +190,29 @@ def parse_go_result(filename): ...@@ -208,85 +190,29 @@ def parse_go_result(filename):
first_slash_index = result_list[0].find('/') first_slash_index = result_list[0].find('/')
last_slash_index = result_list[0].rfind('/') last_slash_index = result_list[0].rfind('/')
full_filename = result_list[0][first_slash_index+4:last_slash_index] # delete ../ prefix full_filename = result_list[0][first_slash_index+4:last_slash_index] # delete ../ prefix
total_bytes, _ = get_data_size(full_filename) total_bytes, _ = __get_data_size(full_filename)
behavior_with_suffix = result_list[0][last_slash_index+1:] behavior_with_suffix = result_list[0][last_slash_index+1:]
last_dash = behavior_with_suffix.rfind("-") last_dash = behavior_with_suffix.rfind("-")
if last_dash == -1: if last_dash == -1:
behavior = behavior_with_suffix behavior = behavior_with_suffix
else: else:
behavior = behavior_with_suffix[:last_dash] behavior = behavior_with_suffix[:last_dash]
go_result.append({ __results.append({
"dataFilename": extract_file_name(full_filename), "dataFilename": __extract_file_name(full_filename),
"throughput": total_bytes / float(result_list[2]) * 1e9 / 2 ** 20, "throughput": total_bytes / float(result_list[2]) * 1e9 / 2 ** 20,
"behavior": behavior, "behavior": behavior,
"language": "go" "language": "go"
}) })
def get_result_from_file(cpp_file="", java_file="", python_file="", go_file=""):
def get_metadata(): results = {}
build_number = os.getenv('BUILD_NUMBER') if cpp_file != "":
build_url = os.getenv('BUILD_URL') __parse_cpp_result(cpp_file)
job_name = os.getenv('JOB_NAME') if java_file != "":
git_commit = os.getenv('GIT_COMMIT') __parse_java_result(java_file)
# actual commit is the actual head of PR that is getting tested if python_file != "":
git_actual_commit = os.getenv('ghprbActualCommit') __parse_python_result(python_file)
if go_file != "":
utc_timestamp = str(calendar.timegm(time.gmtime())) __parse_go_result(go_file)
metadata = {'created': utc_timestamp}
return __results
if build_number: \ No newline at end of file
metadata['buildNumber'] = build_number
if build_url:
metadata['buildUrl'] = build_url
if job_name:
metadata['jobName'] = job_name
if git_commit:
metadata['gitCommit'] = git_commit
if git_actual_commit:
metadata['gitActualCommit'] = git_actual_commit
return metadata
def upload_result(result_list, metadata):
for result in result_list:
new_result = copy.deepcopy(result)
new_result['metadata'] = metadata
bq = big_query_utils.create_big_query()
row = big_query_utils.make_row(str(uuid.uuid4()), new_result)
if not big_query_utils.insert_rows(bq, _PROJECT_ID, _DATASET,
_TABLE + "$" + _NOW,
[row]):
print('Error when uploading result', new_result)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-cpp", "--cpp_input_file",
help="The CPP benchmark result file's name",
default="")
parser.add_argument("-java", "--java_input_file",
help="The Java benchmark result file's name",
default="")
parser.add_argument("-python", "--python_input_file",
help="The Python benchmark result file's name",
default="")
parser.add_argument("-go", "--go_input_file",
help="The golang benchmark result file's name",
default="")
args = parser.parse_args()
parse_cpp_result(args.cpp_input_file)
parse_python_result(args.python_input_file)
parse_java_result(args.java_input_file)
parse_go_result(args.go_input_file)
metadata = get_metadata()
print("uploading cpp results...")
upload_result(cpp_result, metadata)
print("uploading java results...")
upload_result(java_result, metadata)
print("uploading python results...")
upload_result(python_result, metadata)
print("uploading go results...")
upload_result(go_result, metadata)
from __future__ import print_function
from __future__ import absolute_import
import argparse
import os
import re
import copy
import uuid
import calendar
import time
import datetime
from util import big_query_utils
from util import result_parser
_PROJECT_ID = 'grpc-testing'
_DATASET = 'protobuf_benchmark_result'
_TABLE = 'opensource_result_v2'
_NOW = "%d%02d%02d" % (datetime.datetime.now().year,
datetime.datetime.now().month,
datetime.datetime.now().day)
_INITIAL_TIME = calendar.timegm(time.gmtime())
def get_metadata():
build_number = os.getenv('BUILD_NUMBER')
build_url = os.getenv('BUILD_URL')
job_name = os.getenv('JOB_NAME')
git_commit = os.getenv('GIT_COMMIT')
# actual commit is the actual head of PR that is getting tested
git_actual_commit = os.getenv('ghprbActualCommit')
utc_timestamp = str(calendar.timegm(time.gmtime()))
metadata = {'created': utc_timestamp}
if build_number:
metadata['buildNumber'] = build_number
if build_url:
metadata['buildUrl'] = build_url
if job_name:
metadata['jobName'] = job_name
if git_commit:
metadata['gitCommit'] = git_commit
if git_actual_commit:
metadata['gitActualCommit'] = git_actual_commit
return metadata
def upload_result(result_list, metadata):
for result in result_list:
new_result = {}
new_result["metric"] = "throughput"
new_result["value"] = result["throughput"]
new_result["unit"] = "MB/s"
new_result["test"] = "protobuf_benchmark"
new_result["product_name"] = "protobuf"
labels_string = ""
for key in result:
labels_string += ",|%s:%s|" % (key, result[key])
new_result["labels"] = labels_string[1:]
new_result["timestamp"] = _INITIAL_TIME
bq = big_query_utils.create_big_query()
row = big_query_utils.make_row(str(uuid.uuid4()), new_result)
if not big_query_utils.insert_rows(bq, _PROJECT_ID, _DATASET,
_TABLE + "$" + _NOW,
[row]):
print('Error when uploading result', new_result)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("-cpp", "--cpp_input_file",
help="The CPP benchmark result file's name",
default="")
parser.add_argument("-java", "--java_input_file",
help="The Java benchmark result file's name",
default="")
parser.add_argument("-python", "--python_input_file",
help="The Python benchmark result file's name",
default="")
parser.add_argument("-go", "--go_input_file",
help="The golang benchmark result file's name",
default="")
args = parser.parse_args()
metadata = get_metadata()
print("uploading results...")
upload_result(result_parser.get_result_from_file(
cpp_file=args.cpp_input_file,
java_file=args.java_input_file,
python_file=args.python_input_file,
go_file=args.go_input_file
), metadata)
\ No newline at end of file
...@@ -86,7 +86,7 @@ echo "benchmarking java..." ...@@ -86,7 +86,7 @@ echo "benchmarking java..."
# upload result to bq # upload result to bq
make python_add_init make python_add_init
env LD_LIBRARY_PATH="$oldpwd/src/.libs" python util/run_and_upload.py -cpp="../tmp/cpp_result.json" -java="../tmp/java_result.json" \ env LD_LIBRARY_PATH="$oldpwd/src/.libs" python -m util.result_uploader -cpp="../tmp/cpp_result.json" -java="../tmp/java_result.json" \
-python="../tmp/python_result.json" -go="../tmp/go_result.txt" -python="../tmp/python_result.json" -go="../tmp/go_result.txt"
cd $oldpwd cd $oldpwd
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment