Unverified Commit 3becc728 authored by Ge Jun's avatar Ge Jun Committed by GitHub

Merge pull request #587 from zyearn/prometheus

Support Prometheus output
parents b5f48669 d863394a
......@@ -89,3 +89,7 @@ process_username : "gejun"
![img](../images/bvar_noah2.png)
![img](../images/bvar_noah3.png)
# bvar导出到其它监控系统格式
bvar已支持的其它监控系统格式有[Prometheus](https://prometheus.io)。将Prometheus的抓取url地址的路径设置为`/metrics`即可,例如brpc server跑在本机的8080端口,则抓取url配置为`127.0.0.1:8080/metrics`
......@@ -89,3 +89,7 @@ The monitoring system should combine data on every single machine periodically a
![img](../images/bvar_noah2.png)
![img](../images/bvar_noah3.png)
# Dump to the format of other monitoring system
Currently monitoring system supported by bvar is [Prometheus](https://prometheus.io). All you need to do is to set the path in scraping target url to `/metrics`. For example, if brpc server is running in localhost on port 8080, the scraping target should be `127.0.0.1:8080/metrics`.
......@@ -14,7 +14,6 @@
// Authors: Ge,Jun (gejun@baidu.com)
#include <vector> // std::vector
#include "brpc/controller.h" // Controller
#include "brpc/server.h" // Server
#include "brpc/closure_guard.h" // ClosureGuard
......
// Copyright (c) 2018 Bilibili, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Authors: Jiashun Zhu(zhujiashun@bilibili.com)
#include <vector>
#include <iomanip>
#include <map>
#include "brpc/controller.h" // Controller
#include "brpc/server.h" // Server
#include "brpc/closure_guard.h" // ClosureGuard
#include "brpc/builtin/prometheus_metrics_service.h"
#include "brpc/builtin/common.h"
#include "bvar/bvar.h"
namespace bvar {
DECLARE_int32(bvar_latency_p1);
DECLARE_int32(bvar_latency_p2);
DECLARE_int32(bvar_latency_p3);
}
namespace brpc {
// This is a class that convert bvar result to prometheus output.
// Currently the output only includes gauge and summary for two
// reasons:
// 1) We cannot tell gauge and counter just from name and what's
// more counter is just another gauge.
// 2) Histogram and summary is equivalent except that histogram
// calculates quantiles in the server side.
class PrometheusMetricsDumper : public bvar::Dumper {
public:
explicit PrometheusMetricsDumper(butil::IOBufBuilder* os,
const std::string& server_prefix)
: _os(os)
, _server_prefix(server_prefix) {
}
bool dump(const std::string& name, const butil::StringPiece& desc) override;
private:
DISALLOW_COPY_AND_ASSIGN(PrometheusMetricsDumper);
// Return true iff name ends with suffix output by LatencyRecorder.
bool DumpLatencyRecorderSuffix(const butil::StringPiece& name,
const butil::StringPiece& desc);
// 6 is the number of bvars in LatencyRecorder that indicating percentiles
static const int NPERCENTILES = 6;
struct SummaryItems {
std::string latency_percentiles[NPERCENTILES];
std::string latency_avg;
std::string count;
std::string metric_name;
bool IsComplete() const { return !metric_name.empty(); }
};
const SummaryItems* ProcessLatencyRecorderSuffix(const butil::StringPiece& name,
const butil::StringPiece& desc);
private:
butil::IOBufBuilder* _os;
const std::string _server_prefix;
std::map<std::string, SummaryItems> _m;
};
bool PrometheusMetricsDumper::dump(const std::string& name,
const butil::StringPiece& desc) {
if (!desc.empty() && desc[0] == '"') {
// there is no necessary to monitor string in prometheus
return true;
}
if (DumpLatencyRecorderSuffix(name, desc)) {
// Has encountered name with suffix exposed by LatencyRecorder,
// Leave it to DumpLatencyRecorderSuffix to output Summary.
return true;
}
*_os << "# HELP " << name << '\n'
<< "# TYPE " << name << " gauge" << '\n'
<< name << " " << desc << '\n';
return true;
}
const PrometheusMetricsDumper::SummaryItems*
PrometheusMetricsDumper::ProcessLatencyRecorderSuffix(const butil::StringPiece& name,
const butil::StringPiece& desc) {
static std::string latency_names[] = {
butil::string_printf("_latency_%d", (int)bvar::FLAGS_bvar_latency_p1),
butil::string_printf("_latency_%d", (int)bvar::FLAGS_bvar_latency_p2),
butil::string_printf("_latency_%d", (int)bvar::FLAGS_bvar_latency_p3),
"_latency_999", "_latency_9999", "_max_latency"
};
CHECK(NPERCENTILES == arraysize(latency_names));
butil::StringPiece metric_name(name);
for (int i = 0; i < NPERCENTILES; ++i) {
if (!metric_name.ends_with(latency_names[i])) {
continue;
}
metric_name.remove_suffix(latency_names[i].size());
SummaryItems* si = &_m[metric_name.as_string()];
si->latency_percentiles[i] = desc.as_string();
if (i == NPERCENTILES - 1) {
// '_max_latency' is the last suffix name that appear in the sorted bvar
// list, which means all related percentiles have been gathered and we are
// ready to output a Summary.
si->metric_name = metric_name.as_string();
}
return si;
}
// Get the average of latency in recent window size
if (metric_name.ends_with("_latency")) {
metric_name.remove_suffix(8);
SummaryItems* si = &_m[metric_name.as_string()];
si->latency_avg = desc.as_string();
return si;
}
if (metric_name.ends_with("_count")) {
metric_name.remove_suffix(6);
SummaryItems* si = &_m[metric_name.as_string()];
si->count = desc.as_string();
return si;
}
return NULL;
}
bool PrometheusMetricsDumper::DumpLatencyRecorderSuffix(
const butil::StringPiece& name,
const butil::StringPiece& desc) {
if (!name.starts_with(_server_prefix)) {
return false;
}
const SummaryItems* si = ProcessLatencyRecorderSuffix(name, desc);
if (!si) {
return false;
}
if (!si->IsComplete()) {
return true;
}
*_os << "# HELP " << si->metric_name << '\n'
<< "# TYPE " << si->metric_name << " summary\n"
<< si->metric_name << "{quantile=\""
<< (double)(bvar::FLAGS_bvar_latency_p1) / 100 << "\"} "
<< si->latency_percentiles[0] << '\n'
<< si->metric_name << "{quantile=\""
<< (double)(bvar::FLAGS_bvar_latency_p2) / 100 << "\"} "
<< si->latency_percentiles[1] << '\n'
<< si->metric_name << "{quantile=\""
<< (double)(bvar::FLAGS_bvar_latency_p3) / 100 << "\"} "
<< si->latency_percentiles[2] << '\n'
<< si->metric_name << "{quantile=\"0.999\"} "
<< si->latency_percentiles[3] << '\n'
<< si->metric_name << "{quantile=\"0.9999\"} "
<< si->latency_percentiles[4] << '\n'
<< si->metric_name << "{quantile=\"1\"} "
<< si->latency_percentiles[5] << '\n'
<< si->metric_name << "_sum "
// There is no sum of latency in bvar output, just use
// average * count as approximation
<< strtoll(si->latency_avg.data(), NULL, 10) *
strtoll(si->count.data(), NULL, 10) << '\n'
<< si->metric_name << "_count " << si->count << '\n';
return true;
}
void PrometheusMetricsService::default_method(::google::protobuf::RpcController* cntl_base,
const ::brpc::MetricsRequest*,
::brpc::MetricsResponse*,
::google::protobuf::Closure* done) {
ClosureGuard done_guard(done);
Controller *cntl = static_cast<Controller*>(cntl_base);
cntl->http_response().set_content_type("text/plain");
butil::IOBufBuilder os;
PrometheusMetricsDumper dumper(&os, _server->ServerPrefix());
const int ndump = bvar::Variable::dump_exposed(&dumper, NULL);
if (ndump < 0) {
cntl->SetFailed("Fail to dump metrics");
return;
}
os.move_to(cntl->response_attachment());
}
} // namespace brpc
// Copyright (c) 2018 BiliBili, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Authors: Jiashun Zhu(zhujiashun@bilibili.com)
#ifndef BRPC_PROMETHEUS_METRICS_SERVICE_H
#define BRPC_PROMETHEUS_METRICS_SERVICE_H
#include "brpc/builtin_service.pb.h"
#include "brpc/server.h"
namespace brpc {
class PrometheusMetricsService : public metrics {
public:
PrometheusMetricsService(Server* server)
: _server(server) {}
void default_method(::google::protobuf::RpcController* cntl_base,
const ::brpc::MetricsRequest* request,
::brpc::MetricsResponse* response,
::google::protobuf::Closure* done) override;
private:
Server* _server;
};
} // namepace brpc
#endif // BRPC_PROMETHEUS_METRICS_SERVICE_H
......@@ -14,7 +14,7 @@ message FlagsRequest {}
message FlagsResponse {}
message VersionRequest {}
message VersionResponse {}
message HealthRequest{}
message HealthRequest {}
message HealthResponse {}
message StatusRequest {}
message StatusResponse {}
......@@ -42,6 +42,8 @@ message DirRequest {}
message DirResponse {}
message VLogRequest {}
message VLogResponse {}
message MetricsRequest {}
message MetricsResponse {}
message BadMethodRequest {
required string service_name = 1;
}
......@@ -95,10 +97,15 @@ service sockets {
rpc default_method(SocketsRequest) returns (SocketsResponse);
}
service metrics {
rpc default_method(MetricsRequest) returns (MetricsResponse);
}
service badmethod {
rpc no_method(BadMethodRequest) returns (BadMethodResponse);
}
message ProfileRequest {}
message ProfileResponse {}
......
......@@ -64,6 +64,7 @@
#include "brpc/builtin/ids_service.h" // IdsService
#include "brpc/builtin/sockets_service.h" // SocketsService
#include "brpc/builtin/hotspots_service.h" // HotspotsService
#include "brpc/builtin/prometheus_metrics_service.h"
#include "brpc/details/method_status.h"
#include "brpc/load_balancer.h"
#include "brpc/naming_service.h"
......@@ -486,6 +487,10 @@ int Server::AddBuiltinServices() {
LOG(ERROR) << "Fail to add ListService";
return -1;
}
if (AddBuiltinService(new (std::nothrow) PrometheusMetricsService(this))) {
LOG(ERROR) << "Fail to add MetricsService";
return -1;
}
if (FLAGS_enable_threads_service &&
AddBuiltinService(new (std::nothrow) ThreadsService)) {
LOG(ERROR) << "Fail to add ThreadsService";
......
......@@ -522,6 +522,7 @@ friend class ProtobufsService;
friend class ConnectionsService;
friend class BadMethodService;
friend class ServerPrivateAccessor;
friend class PrometheusMetricsService;
friend class Controller;
int AddServiceInternal(google::protobuf::Service* service,
......
// brpc - A framework to host and access services throughout Baidu.
// Copyright (c) 2018 BiliBili, Inc.
// Author: Jiashun Zhu(zhujiashun@bilibili.com)
// Date: Tue Dec 3 11:27:18 CST 2018
#include <gtest/gtest.h>
#include "brpc/server.h"
#include "brpc/channel.h"
#include "brpc/controller.h"
#include "butil/strings/string_piece.h"
#include "echo.pb.h"
int main(int argc, char* argv[]) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
class DummyEchoServiceImpl : public test::EchoService {
public:
virtual ~DummyEchoServiceImpl() {}
virtual void Echo(google::protobuf::RpcController* cntl_base,
const test::EchoRequest* request,
test::EchoResponse* response,
google::protobuf::Closure* done) {
brpc::ClosureGuard done_guard(done);
return;
}
};
enum STATE {
HELP = 0,
TYPE,
GAUGE,
SUMMARY
};
TEST(PrometheusMetrics, sanity) {
brpc::Server server;
DummyEchoServiceImpl echo_svc;
ASSERT_EQ(0, server.AddService(&echo_svc, brpc::SERVER_DOESNT_OWN_SERVICE));
ASSERT_EQ(0, server.Start("127.0.0.1:8614", NULL));
brpc::Channel channel;
brpc::ChannelOptions channel_opts;
channel_opts.protocol = "http";
ASSERT_EQ(0, channel.Init("127.0.0.1:8614", &channel_opts));
brpc::Controller cntl;
cntl.http_request().uri() = "/metrics";
channel.CallMethod(NULL, &cntl, NULL, NULL, NULL);
ASSERT_FALSE(cntl.Failed());
std::string res = cntl.response_attachment().to_string();
size_t start_pos = 0;
size_t end_pos = 0;
STATE state = HELP;
char name_help[128];
char name_type[128];
char type[16];
int matched = 0;
int gauge_num = 0;
bool summary_sum_gathered = false;
bool summary_count_gathered = false;
bool has_ever_summary = false;
bool has_ever_gauge = false;
while ((end_pos = res.find('\n', start_pos)) != butil::StringPiece::npos) {
res[end_pos] = '\0'; // safe;
switch (state) {
case HELP:
matched = sscanf(res.data() + start_pos, "# HELP %s", name_help);
ASSERT_EQ(1, matched);
state = TYPE;
break;
case TYPE:
matched = sscanf(res.data() + start_pos, "# TYPE %s %s", name_type, type);
ASSERT_EQ(2, matched);
ASSERT_STREQ(name_type, name_help);
if (strcmp(type, "gauge") == 0) {
state = GAUGE;
} else if (strcmp(type, "summary") == 0) {
state = SUMMARY;
} else {
ASSERT_TRUE(false);
}
break;
case GAUGE:
matched = sscanf(res.data() + start_pos, "%s %d", name_type, &gauge_num);
ASSERT_EQ(2, matched);
ASSERT_STREQ(name_type, name_help);
state = HELP;
has_ever_gauge = true;
break;
case SUMMARY:
if (butil::StringPiece(res.data() + start_pos, end_pos - start_pos).find("quantile=")
== butil::StringPiece::npos) {
matched = sscanf(res.data() + start_pos, "%s %d", name_type, &gauge_num);
ASSERT_EQ(2, matched);
ASSERT_TRUE(strncmp(name_type, name_help, strlen(name_help)) == 0);
if (butil::StringPiece(name_type).ends_with("_sum")) {
ASSERT_FALSE(summary_sum_gathered);
summary_sum_gathered = true;
} else if (butil::StringPiece(name_type).ends_with("_count")) {
ASSERT_FALSE(summary_count_gathered);
summary_count_gathered = true;
} else {
ASSERT_TRUE(false);
}
if (summary_sum_gathered && summary_count_gathered) {
state = HELP;
summary_sum_gathered = false;
summary_count_gathered = false;
has_ever_summary = true;
}
} // else find "quantile=", just break to next line
break;
default:
ASSERT_TRUE(false);
break;
}
start_pos = end_pos + 1;
}
ASSERT_TRUE(has_ever_gauge && has_ever_summary);
ASSERT_EQ(0, server.Stop(0));
ASSERT_EQ(0, server.Join());
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment