2 * API Reference: https://ethereum.github.io/beacon-APIs/
5 * - Check that some processes (nethermind, lightouse) do not exceed a percentage
6 * of total memory. For example 40 %.
14 time
::{self, Duration
},
19 message
::header
::ContentType
, transport
::smtp
::authentication
::Credentials
, Message
,
20 SmtpTransport
, Transport
,
22 use serde
::Deserialize
;
24 use crate::config
::Config
;
28 const FILE_CONF
: &str = "config.ron";
29 const CHECK_PERIOD
: Duration
= Duration
::from_secs(10); // 10 s.
30 const PING_TIMEOUT
: Duration
= Duration
::from_secs(10); // 10 s.
32 const NUMBER_SUCCESSIVE_ERRORS_SEND_EMAIL
: i32 = 2; // Send an email after 2 successive errors.
34 // Send an emergency email after 10 successive errors.
35 const NUMBER_SUCCESSIVE_ERRORS_SEND_EMEGENCY_EMAIL
: i32 = 10;
37 const EMAIL_RESEND_PERIOD
: Duration
= Duration
::from_secs(2 * 60 * 60); // 2 h.
38 const STATE_PRINT_PERIOD
: Duration
= Duration
::from_secs(15 * 60); // 15 min.
39 const BASE_URI
: &str = "http://localhost:5052/eth/v1/";
41 fn main() -> Result
<()> {
42 println!("Staking Watchdog");
44 let config
= Config
::read(FILE_CONF
)?
;
47 "Configuration: {:?}",
49 smtp_password
: "*****".to_string(),
54 let check_alive_error_mutex
= start_check_alive_thread();
56 let mut time_last_email_send
= time
::Instant
::now() - EMAIL_RESEND_PERIOD
;
57 let mut time_last_state_printed
= time
::Instant
::now() - STATE_PRINT_PERIOD
;
58 let mut error_count
= 0; // Number of successive errors.
61 let time_beginning_loop
= time
::Instant
::now();
63 if let Err(error
) = check_validators(&config
.pub_keys
)
65 .and(check_alive_error_mutex
.lock().unwrap().as_ref())
68 println!("Error {:?} (error_count={}): {}", error
, error_count
, error
);
69 if error_count
>= NUMBER_SUCCESSIVE_ERRORS_SEND_EMAIL
70 && time
::Instant
::now() - time_last_email_send
>= EMAIL_RESEND_PERIOD
73 println!("Sending email...");
75 "Watchdog: Staking error",
76 &format!("Error: {}", error
),
77 &config
.smtp_relay_address
,
79 &config
.smtp_password
,
81 Err(email_error
) => println!("Error sending email: {:?}", email_error
),
83 println!("Email successfully sent");
84 time_last_email_send
= time
::Instant
::now();
87 } else if error_count
== NUMBER_SUCCESSIVE_ERRORS_SEND_EMEGENCY_EMAIL
{
89 println!("Sending EMERGENCY email...");
91 "[EMERGENCY] Watchdog: Staking error",
93 "[EMERGENCY] Error: {}\n\nNumber of error: {}",
96 &config
.smtp_relay_address
,
98 &config
.smtp_password
,
100 Err(email_error
) => println!("Error sending email: {:?}", email_error
),
102 println!("Email successfully sent");
103 time_last_email_send
= time
::Instant
::now();
110 println!("End of erroneous state");
113 if time
::Instant
::now() - time_last_state_printed
>= STATE_PRINT_PERIOD
{
114 println!("No error detected");
115 time_last_state_printed
= time
::Instant
::now();
119 let elapsed
= time
::Instant
::now() - time_beginning_loop
;
121 if elapsed
< CHECK_PERIOD
{
122 let to_wait
= CHECK_PERIOD
- elapsed
;
123 thread
::sleep(to_wait
);
134 UnknownCodeFromHealthCheck(u16),
135 ReqwestError(reqwest
::Error
),
136 ValidatorError
{ pub_key
: String
, message
: String
},
137 ValidatorStatusError
{ pub_key
: String
, message
: String
},
141 impl fmt
::Display
for CheckError
{
142 fn fmt(&self, f
: &mut fmt
::Formatter
) -> fmt
::Result
{
144 CheckError
::HttpError(text
) => {
147 "Beacon node health check can't be reached (HTTP error): {}",
151 CheckError
::NotSync
=> {
154 "Beacon node health check is syncing (currently not sync)"
157 CheckError
::InvalidSyncStatus
=> {
158 write!(f
, "Beacon node health check returns an invalid status code")
160 CheckError
::NodeHavingIssues
=> {
163 "Beacon node health check is not initilized or having issue"
166 CheckError
::UnknownCodeFromHealthCheck(code
) => {
169 "Beacon node health check returns an unknown code: {}",
173 CheckError
::ReqwestError(error
) => {
174 write!(f
, "Error from reqwest: {}", error
)
176 CheckError
::ValidatorError
{ pub_key
, message
} => {
177 write!(f
, "Validator '{}' returns an error: {}", pub_key
, message
)
179 CheckError
::ValidatorStatusError
{ pub_key
, message
} => {
182 "Validator '{}' returns a status error: {}",
186 CheckError
::CheckAlive
=> {
187 write!(f
, "Check alive ping hasn't been received")
193 impl From
<reqwest
::Error
> for CheckError
{
194 fn from(value
: reqwest
::Error
) -> Self {
195 CheckError
::ReqwestError(value
)
199 #[derive(Deserialize, Debug)]
200 struct JsonValidatorState
{
201 data
: JsonValidatorStateData
,
204 #[derive(Deserialize, Debug)]
205 struct JsonValidatorStateData
{
209 #[derive(Deserialize, Debug)]
215 fn check_validators(pub_keys
: &[String
]) -> std
::result
::Result
<(), CheckError
> {
217 let client
= reqwest
::blocking
::Client
::new();
219 let request_health
= client
220 .get(format!("{url}node/health"))
221 .header("accept", "application/json");
222 match request_health
.send() {
224 // println!("{resp:?}"); // For debug.
225 match resp
.status().as_u16() {
227 206 => return Err(CheckError
::NotSync
),
228 400 => return Err(CheckError
::InvalidSyncStatus
),
229 503 => return Err(CheckError
::NodeHavingIssues
),
230 code
=> return Err(CheckError
::UnknownCodeFromHealthCheck(code
)),
234 println!("{error:?}");
235 return Err(CheckError
::HttpError(error
.to_string()));
239 for pub_key
in pub_keys
{
241 .get(format!("{url}beacon/states/head/validators/0x{pub_key}"))
242 .header("accept", "application/json");
243 match request
.send() {
245 // println!("{resp:?}"); // For debug.
246 match resp
.status().as_u16() {
248 let json
: JsonValidatorState
= resp
.json()?
;
249 if json
.data
.status
!= "active_ongoing" {
250 return Err(CheckError
::ValidatorStatusError
{
251 pub_key
: pub_key
.clone(),
252 message
: format!("Status: {}", json
.data
.status
),
257 let json
: JsonError
= resp
.json()?
;
258 return Err(CheckError
::ValidatorError
{
259 pub_key
: pub_key
.clone(),
261 "Http error code: {}, message: {}",
269 return Err(CheckError
::ValidatorError
{
270 pub_key
: pub_key
.clone(),
271 message
: error
.to_string(),
283 smtp_relay_address
: &str,
287 let email
= Message
::builder()
289 .from("Staking Watchdog <redmine@d-lan.net>".parse()?
)
290 .to("Greg Burri <greg.burri@gmail.com>".parse()?
)
292 .header(ContentType
::TEXT_PLAIN
)
293 .body(body
.to_string())?
;
295 let creds
= Credentials
::new(login
.to_string(), pass
.to_string());
297 let mailer
= SmtpTransport
::relay(smtp_relay_address
)?
301 let response
= mailer
.send(&email
)?
;
303 println!("{:?}", response
);
308 fn start_check_alive_thread() -> Arc
<Mutex
<std
::result
::Result
<(), CheckError
>>> {
309 let check_alive_error_mutex
: Arc
<Mutex
<std
::result
::Result
<(), CheckError
>>> =
310 Arc
::new(Mutex
::new(Ok(())));
311 let check_alive_error_mutex_copy
= check_alive_error_mutex
.clone();
313 let _thread_check_alive_handle
= thread
::spawn(move || {
314 let socket
= UdpSocket
::bind("0.0.0.0:8739").unwrap();
315 socket
.set_read_timeout(Some(PING_TIMEOUT
)).unwrap();
317 let mut buffer
= [0u8; 8];
320 match socket
.recv_from(&mut buffer
) {
322 let mut check_alive_error
= check_alive_error_mutex
.lock().unwrap();
324 *check_alive_error
= Ok(());
325 socket
.send_to(&buffer
, &src
).unwrap();
327 *check_alive_error
= Err(CheckError
::CheckAlive
);
331 let mut check_alive_error
= check_alive_error_mutex
.lock().unwrap();
332 *check_alive_error
= Err(CheckError
::CheckAlive
);
338 check_alive_error_mutex_copy