Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
B
brpc
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
brpc
Commits
2685cd8c
Unverified
Commit
2685cd8c
authored
Nov 09, 2018
by
Ge Jun
Committed by
GitHub
Nov 09, 2018
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #531 from TousakaRin/circuit_breaker
Exposing the status of CircuitBreaker to the builtIn page
parents
11e9156e
1737f164
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
107 additions
and
37 deletions
+107
-37
connections_service.cpp
src/brpc/builtin/connections_service.cpp
+16
-10
circuit_breaker.cpp
src/brpc/circuit_breaker.cpp
+21
-10
circuit_breaker.h
src/brpc/circuit_breaker.h
+15
-4
controller.cpp
src/brpc/controller.cpp
+17
-10
socket.cpp
src/brpc/socket.cpp
+32
-3
socket.h
src/brpc/socket.h
+6
-0
No files found.
src/brpc/builtin/connections_service.cpp
View file @
2685cd8c
...
...
@@ -108,7 +108,7 @@ static std::string BriefName(const std::string& cname) {
void
ConnectionsService
::
PrintConnections
(
std
::
ostream
&
os
,
const
std
::
vector
<
SocketId
>&
conns
,
bool
use_html
,
const
Server
*
server
,
bool
need_local
)
const
{
bool
use_html
,
const
Server
*
server
,
bool
is_channel_conn
)
const
{
if
(
conns
.
empty
())
{
return
;
}
...
...
@@ -116,8 +116,10 @@ void ConnectionsService::PrintConnections(
os
<<
"<table class=
\"
gridtable sortable
\"
border=
\"
1
\"
><tr>"
"<th>CreatedTime</th>"
"<th>RemoteSide</th>"
;
if
(
need_local
)
{
os
<<
"<th>Local</th>"
;
if
(
is_channel_conn
)
{
os
<<
"<th>Local</th>"
"<th>RecentErr</th>"
"<th>nbreak</th>"
;
}
os
<<
"<th>SSL</th>"
"<th>Protocol</th>"
...
...
@@ -135,8 +137,8 @@ void ConnectionsService::PrintConnections(
"</tr>
\n
"
;
}
else
{
os
<<
"CreatedTime |RemoteSide |"
;
if
(
need_local
)
{
os
<<
"Local|"
;
if
(
is_channel_conn
)
{
os
<<
"Local|
RecentErr|nbreak|
"
;
}
os
<<
"SSL|Protocol |fd |"
"InBytes/s|In/s |InBytes/m |In/m |"
...
...
@@ -171,8 +173,10 @@ void ConnectionsService::PrintConnections(
if
(
failed
)
{
os
<<
min_width
(
"Broken"
,
26
)
<<
bar
<<
min_width
(
NameOfPoint
(
ptr
->
remote_side
()),
19
)
<<
bar
;
if
(
need_local
)
{
os
<<
min_width
(
ptr
->
local_side
().
port
,
5
)
<<
bar
;
if
(
is_channel_conn
)
{
os
<<
min_width
(
ptr
->
local_side
().
port
,
5
)
<<
bar
<<
min_width
(
ptr
->
recent_error_count
(),
10
)
<<
bar
<<
min_width
(
ptr
->
isolated_times
(),
7
)
<<
bar
;
}
os
<<
min_width
(
"-"
,
3
)
<<
bar
<<
min_width
(
"-"
,
12
)
<<
bar
...
...
@@ -267,12 +271,14 @@ void ConnectionsService::PrintConnections(
strcpy
(
rtt_display
,
"-"
);
}
os
<<
bar
<<
min_width
(
NameOfPoint
(
ptr
->
remote_side
()),
19
)
<<
bar
;
if
(
need_local
)
{
if
(
is_channel_conn
)
{
if
(
ptr
->
local_side
().
port
>
0
)
{
os
<<
min_width
(
ptr
->
local_side
().
port
,
5
)
<<
bar
;
}
else
{
os
<<
min_width
(
"-"
,
5
)
<<
bar
;
}
os
<<
min_width
(
ptr
->
recent_error_count
(),
10
)
<<
bar
<<
min_width
(
ptr
->
isolated_times
(),
7
)
<<
bar
;
}
os
<<
SSLStateToYesNo
(
ptr
->
ssl_state
(),
use_html
)
<<
bar
;
char
protname
[
32
];
...
...
@@ -367,7 +373,7 @@ void ConnectionsService::default_method(
conns
.
insert
(
conns
.
end
(),
internal_conns
.
begin
(),
internal_conns
.
end
());
}
os
<<
"server_connection_count: "
<<
num_conns
<<
'\n'
;
PrintConnections
(
os
,
conns
,
use_html
,
server
,
false
/*
need_local
*/
);
PrintConnections
(
os
,
conns
,
use_html
,
server
,
false
/*
is_channel_conn
*/
);
if
(
has_uncopied
)
{
// Notice that we don't put the link of givemeall directly because
// people seeing the link are very likely to click it which may be
...
...
@@ -380,7 +386,7 @@ void ConnectionsService::default_method(
SocketMapList
(
&
conns
);
os
<<
(
use_html
?
"<br>
\n
"
:
"
\n
"
)
<<
"channel_connection_count: "
<<
GetChannelConnectionCount
()
<<
'\n'
;
PrintConnections
(
os
,
conns
,
use_html
,
server
,
true
/*
need_local
*/
);
PrintConnections
(
os
,
conns
,
use_html
,
server
,
true
/*
is_channel_conn
*/
);
if
(
use_html
)
{
os
<<
"</body></html>
\n
"
;
...
...
src/brpc/circuit_breaker.cpp
View file @
2685cd8c
...
...
@@ -21,9 +21,9 @@
namespace
brpc
{
DEFINE_int32
(
circuit_breaker_short_window_size
,
500
,
DEFINE_int32
(
circuit_breaker_short_window_size
,
1
500
,
"Short window sample size."
);
DEFINE_int32
(
circuit_breaker_long_window_size
,
1
000
,
DEFINE_int32
(
circuit_breaker_long_window_size
,
3
000
,
"Long window sample size."
);
DEFINE_int32
(
circuit_breaker_short_window_error_percent
,
10
,
"The maximum error rate allowed by the short window, ranging from 0-99."
);
...
...
@@ -39,6 +39,8 @@ DEFINE_int32(circuit_breaker_min_isolation_duration_ms, 100,
"Minimum isolation duration in milliseconds"
);
DEFINE_int32
(
circuit_breaker_max_isolation_duration_ms
,
30000
,
"Maximum isolation duration in milliseconds"
);
DEFINE_double
(
circuit_breaker_epsilon_value
,
0.02
,
"ema_alpha = 1 - std::pow(epsilon, 1.0 / window_size)"
);
namespace
{
// EPSILON is used to generate the smoothing coefficient when calculating EMA.
...
...
@@ -51,7 +53,9 @@ namespace {
// when window_size = 1000,
// EPSILON = 0.1, smooth = 0.9977
// EPSILON = 0.3, smooth = 0.9987
const
double
EPSILON
=
0.1
;
#define EPSILON (FLAGS_circuit_breaker_epsilon_value)
}
// namepace
CircuitBreaker
::
EmaErrorRecorder
::
EmaErrorRecorder
(
int
window_size
,
...
...
@@ -115,8 +119,8 @@ bool CircuitBreaker::EmaErrorRecorder::UpdateErrorCost(int64_t error_cost,
int64_t
ema_error_cost
=
_ema_error_cost
.
fetch_add
(
error_cost
,
butil
::
memory_order_relaxed
);
ema_error_cost
+=
error_cost
;
int64_t
max_error_cost
=
ema_latency
*
_window_size
*
(
_max_error_percent
/
100.0
)
*
(
1.0
+
EPSILON
);
const
int64_t
max_error_cost
=
ema_latency
*
_window_size
*
(
_max_error_percent
/
100.0
)
*
(
1.0
+
EPSILON
);
return
ema_error_cost
<=
max_error_cost
;
}
...
...
@@ -147,8 +151,9 @@ CircuitBreaker::CircuitBreaker()
,
_short_window
(
FLAGS_circuit_breaker_short_window_size
,
FLAGS_circuit_breaker_short_window_error_percent
)
,
_last_reset_time_ms
(
butil
::
cpuwide_time_ms
())
,
_broken
(
false
)
,
_isolation_duration_ms
(
FLAGS_circuit_breaker_min_isolation_duration_ms
)
{
,
_isolation_duration_ms
(
FLAGS_circuit_breaker_min_isolation_duration_ms
)
,
_isolated_times
(
0
)
,
_broken
(
false
)
{
}
bool
CircuitBreaker
::
OnCallEnd
(
int
error_code
,
int64_t
latency
)
{
...
...
@@ -159,9 +164,7 @@ bool CircuitBreaker::OnCallEnd(int error_code, int64_t latency) {
_short_window
.
OnCallEnd
(
error_code
,
latency
))
{
return
true
;
}
if
(
!
_broken
.
exchange
(
true
,
butil
::
memory_order_acquire
))
{
UpdateIsolationDuration
();
}
MarkAsBroken
();
return
false
;
}
...
...
@@ -172,6 +175,13 @@ void CircuitBreaker::Reset() {
_broken
.
store
(
false
,
butil
::
memory_order_release
);
}
void
CircuitBreaker
::
MarkAsBroken
()
{
if
(
!
_broken
.
exchange
(
true
,
butil
::
memory_order_acquire
))
{
_isolated_times
.
fetch_add
(
1
,
butil
::
memory_order_relaxed
);
UpdateIsolationDuration
();
}
}
void
CircuitBreaker
::
UpdateIsolationDuration
()
{
int64_t
now_time_ms
=
butil
::
cpuwide_time_ms
();
int
isolation_duration_ms
=
_isolation_duration_ms
.
load
(
butil
::
memory_order_relaxed
);
...
...
@@ -188,4 +198,5 @@ void CircuitBreaker::UpdateIsolationDuration() {
_isolation_duration_ms
.
store
(
isolation_duration_ms
,
butil
::
memory_order_relaxed
);
}
}
// namespace brpc
src/brpc/circuit_breaker.h
View file @
2685cd8c
...
...
@@ -38,12 +38,22 @@ public:
// Reset CircuitBreaker and clear history data. will erase the historical
// data and start sampling again. Before you call this method, you need to
// ensure that no one else is
calling OnCallEnd
.
// ensure that no one else is
accessing CircuitBreaker
.
void
Reset
();
// Mark the Socket as broken. Call this method when you want to isolate a
// node in advance. When this method is called multiple times in succession,
// only the first call will take effect.
void
MarkAsBroken
();
// Number of times marked as broken
int
isolated_times
()
const
{
return
_isolated_times
.
load
(
butil
::
memory_order_relaxed
);
}
// The duration that should be isolated when the socket fails in milliseconds.
// The higher the frequency of socket errors, the longer the duration.
int
isolation_duration_ms
()
{
int
isolation_duration_ms
()
const
{
return
_isolation_duration_ms
.
load
(
butil
::
memory_order_relaxed
);
}
...
...
@@ -55,7 +65,7 @@ private:
EmaErrorRecorder
(
int
windows_size
,
int
max_error_percent
);
bool
OnCallEnd
(
int
error_code
,
int64_t
latency
);
void
Reset
();
private
:
int64_t
UpdateLatency
(
int64_t
latency
);
bool
UpdateErrorCost
(
int64_t
latency
,
int64_t
ema_latency
);
...
...
@@ -72,8 +82,9 @@ private:
EmaErrorRecorder
_long_window
;
EmaErrorRecorder
_short_window
;
int64_t
_last_reset_time_ms
;
butil
::
atomic
<
bool
>
_broken
;
butil
::
atomic
<
int
>
_isolation_duration_ms
;
butil
::
atomic
<
int
>
_isolated_times
;
butil
::
atomic
<
bool
>
_broken
;
};
}
// namespace brpc
...
...
src/brpc/controller.cpp
View file @
2685cd8c
...
...
@@ -696,11 +696,22 @@ inline bool does_error_affect_main_socket(int error_code) {
// entire RPC (specified by c->FailedInline()).
void
Controller
::
Call
::
OnComplete
(
Controller
*
c
,
int
error_code
/*note*/
,
bool
responded
,
bool
end_of_rpc
)
{
if
(
enable_circuit_breaker
&&
sending_sock
)
{
sending_sock
->
FeedbackCircuitBreaker
(
error_code
,
butil
::
gettimeofday_us
()
-
begin_time_us
);
if
(
stream_user_data
)
{
stream_user_data
->
DestroyStreamUserData
(
sending_sock
,
c
,
error_code
,
end_of_rpc
);
stream_user_data
=
NULL
;
}
if
(
sending_sock
!=
NULL
)
{
if
(
error_code
!=
0
)
{
sending_sock
->
AddRecentError
();
}
if
(
enable_circuit_breaker
)
{
sending_sock
->
FeedbackCircuitBreaker
(
error_code
,
butil
::
gettimeofday_us
()
-
begin_time_us
);
}
}
switch
(
c
->
connection_type
())
{
case
CONNECTION_TYPE_UNKNOWN
:
break
;
...
...
@@ -758,6 +769,7 @@ void Controller::Call::OnComplete(
}
break
;
}
if
(
ELOGOFF
==
error_code
)
{
SocketUniquePtr
sock
;
if
(
Socket
::
Address
(
peer_id
,
&
sock
)
==
0
)
{
...
...
@@ -765,18 +777,13 @@ void Controller::Call::OnComplete(
sock
->
SetLogOff
();
}
}
if
(
need_feedback
)
{
const
LoadBalancer
::
CallInfo
info
=
{
begin_time_us
,
peer_id
,
error_code
,
c
};
c
->
_lb
->
Feedback
(
info
);
}
if
(
stream_user_data
)
{
stream_user_data
->
DestroyStreamUserData
(
sending_sock
,
c
,
error_code
,
end_of_rpc
);
stream_user_data
=
NULL
;
}
// Release the `Socket' we used to send/receive data
sending_sock
.
reset
(
NULL
);
}
...
...
src/brpc/socket.cpp
View file @
2685cd8c
...
...
@@ -178,6 +178,8 @@ public:
CircuitBreaker
circuit_breaker
;
butil
::
atomic
<
uint64_t
>
recent_error_count
;
explicit
SharedPart
(
SocketId
creator_socket_id
);
~
SharedPart
();
...
...
@@ -193,7 +195,8 @@ Socket::SharedPart::SharedPart(SocketId creator_socket_id2)
,
in_num_messages
(
0
)
,
out_size
(
0
)
,
out_num_messages
(
0
)
,
extended_stat
(
NULL
)
{
,
extended_stat
(
NULL
)
,
recent_error_count
(
0
)
{
}
Socket
::
SharedPart
::~
SharedPart
()
{
...
...
@@ -766,6 +769,7 @@ void Socket::Revive() {
SharedPart
*
sp
=
GetSharedPart
();
if
(
sp
)
{
sp
->
circuit_breaker
.
Reset
();
sp
->
recent_error_count
.
store
(
0
,
butil
::
memory_order_relaxed
);
}
// Set this flag to true since we add additional ref again
_recycle_flag
.
store
(
false
,
butil
::
memory_order_relaxed
);
...
...
@@ -802,6 +806,29 @@ int Socket::ReleaseAdditionalReference() {
return
-
1
;
}
void
Socket
::
AddRecentError
()
{
SharedPart
*
sp
=
GetSharedPart
();
if
(
sp
)
{
sp
->
recent_error_count
.
fetch_add
(
1
,
butil
::
memory_order_relaxed
);
}
}
int64_t
Socket
::
recent_error_count
()
const
{
SharedPart
*
sp
=
GetSharedPart
();
if
(
sp
)
{
return
sp
->
recent_error_count
.
load
(
butil
::
memory_order_relaxed
);
}
return
0
;
}
int
Socket
::
isolated_times
()
const
{
SharedPart
*
sp
=
GetSharedPart
();
if
(
sp
)
{
return
sp
->
circuit_breaker
.
isolated_times
();
}
return
0
;
}
int
Socket
::
SetFailed
(
int
error_code
,
const
char
*
error_fmt
,
...)
{
if
(
error_code
==
0
)
{
CHECK
(
false
)
<<
"error_code is 0"
;
...
...
@@ -836,6 +863,7 @@ int Socket::SetFailed(int error_code, const char* error_fmt, ...) {
// by Channel to revive never-connected socket when server side
// comes online.
if
(
_health_check_interval_s
>
0
)
{
GetOrNewSharedPart
(
)
->
circuit_breaker
.
MarkAsBroken
();
PeriodicTaskManager
::
StartTaskAt
(
new
HealthCheckTask
(
id
()),
butil
::
milliseconds_from_now
(
GetOrNewSharedPart
()
->
...
...
@@ -876,8 +904,9 @@ int Socket::SetFailed() {
void
Socket
::
FeedbackCircuitBreaker
(
int
error_code
,
int64_t
latency_us
)
{
if
(
!
GetOrNewSharedPart
()
->
circuit_breaker
.
OnCallEnd
(
error_code
,
latency_us
))
{
LOG
(
ERROR
)
<<
"Socket["
<<
*
this
<<
"] isolated by circuit breaker"
;
SetFailed
(
main_socket_id
());
if
(
SetFailed
(
main_socket_id
())
==
0
)
{
LOG
(
ERROR
)
<<
"Socket["
<<
*
this
<<
"] isolated by circuit breaker"
;
}
}
}
...
...
src/brpc/socket.h
View file @
2685cd8c
...
...
@@ -317,6 +317,12 @@ public:
__attribute__
((
__format__
(
__printf__
,
3
,
4
)));
static
int
SetFailed
(
SocketId
id
);
void
AddRecentError
();
int64_t
recent_error_count
()
const
;
int
isolated_times
()
const
;
void
FeedbackCircuitBreaker
(
int
error_code
,
int64_t
latency_us
);
bool
Failed
()
const
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment