Skip to content

Commit

Permalink
Merge #589: Fix broken grateful shutdown for tracker API
Browse files Browse the repository at this point in the history
452b4a0 feat: improve http_health_check output (Jose Celano)
ac18605 feat: improve log when starting the API (Jose Celano)
53613ec feat: start log lines with capital (Jose Celano)
cf613b8 fix: [#588] broken grateful shutdown for tracker API (Jose Celano)

Pull request description:

  The internal halt channel was not working because the sender was being dropped just after starting the server.

  That also made the `shutdown_signal` fail.

  ```rust
  pub async fn shutdown_signal(rx_halt: tokio::sync::oneshot::Receiver<Halted>) {
      let halt = async {
          match rx_halt.await {
              Ok(signal) => signal,
              Err(err) => panic!("Failed to install stop signal: {err}"),
          }
      };

      tokio::select! {
          signal = halt => { info!("Halt signal processed: {}", signal) },
          () = global_shutdown_signal() => { info!("Global shutdown signal processed") }
      }
  }
  ```

  Since the signal branch in the `tokio::select!` was finishing the global_shutdown_signal did not work either. So you had to kill the process manually to stop the tracker.

  It seems Rust partially dropped the `Running::halt_taks` attribute and that closed the channel.

  The issue was introduced in this commit: 13140f6

ACKs for top commit:
  josecelano:
    ACK 452b4a0

Tree-SHA512: 96afc18a62c55b1bd5a97e97f43bd0c2603de00944211c46197944b2e24008f54e4d7c96b43b237e029c61e67f060716315f85433abcb3234ba880d1e20032e3
  • Loading branch information
josecelano committed Jan 9, 2024
2 parents 5f6eed7 + 452b4a0 commit a876d8c
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 18 deletions.
2 changes: 1 addition & 1 deletion src/bin/http_health_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ async fn main() {
let args: Vec<String> = env::args().collect();
if args.len() != 2 {
eprintln!("Usage: cargo run --bin http_health_check <HEALTH_URL>");
eprintln!("Example: cargo run --bin http_health_check http://127.0.0.1:1212/api/health_check");
eprintln!("Example: cargo run --bin http_health_check http://127.0.0.1:1313/health_check");
std::process::exit(1);
}

Expand Down
2 changes: 1 addition & 1 deletion src/bootstrap/jobs/health_check_api.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ pub async fn start_job(config: Arc<Configuration>) -> JoinHandle<()> {

// Wait until the API server job is running
match rx_start.await {
Ok(msg) => info!("Torrust Health Check API server started on socket: {}", msg.address),
Ok(msg) => info!("Torrust Health Check API server started on: http://{}", msg.address),
Err(e) => panic!("the Health Check API server was dropped: {e}"),
}

Expand Down
2 changes: 1 addition & 1 deletion src/bootstrap/jobs/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ pub struct Started {

pub async fn make_rust_tls(enabled: bool, cert: &Option<String>, key: &Option<String>) -> Option<Result<RustlsConfig, Error>> {
if !enabled {
info!("tls not enabled");
info!("TLS not enabled");
return None;
}

Expand Down
1 change: 1 addition & 0 deletions src/bootstrap/jobs/tracker_apis.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ async fn start_v1(socket: SocketAddr, tls: Option<RustlsConfig>, tracker: Arc<co
.expect("it should be able to start to the tracker api");

tokio::spawn(async move {
assert!(!server.state.halt_task.is_closed(), "Halt channel should be open");
server.state.task.await.expect("failed to close service");
})
}
Expand Down
33 changes: 23 additions & 10 deletions src/servers/apis/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use axum_server::tls_rustls::RustlsConfig;
use axum_server::Handle;
use derive_more::Constructor;
use futures::future::BoxFuture;
use log::{error, info};
use tokio::sync::oneshot::{Receiver, Sender};

use super::routes::router;
Expand Down Expand Up @@ -101,13 +102,22 @@ impl ApiServer<Stopped> {
launcher
});

Ok(ApiServer {
state: Running {
binding: rx_start.await.expect("unable to start service").address,
halt_task: tx_halt,
task,
let api_server = match rx_start.await {
Ok(started) => ApiServer {
state: Running {
binding: started.address,
halt_task: tx_halt,
task,
},
},
})
Err(err) => {
let msg = format!("Unable to start API server: {err}");
error!("{}", msg);
panic!("{}", msg);
}
};

Ok(api_server)
}
}

Expand Down Expand Up @@ -159,29 +169,32 @@ impl Launcher {
tokio::task::spawn(graceful_shutdown(
handle.clone(),
rx_halt,
format!("shutting down http server on socket address: {address}"),
format!("Shutting down tracker API server on socket address: {address}"),
));

let tls = self.tls.clone();
let protocol = if tls.is_some() { "https" } else { "http" };

let running = Box::pin(async {
match tls {
Some(tls) => axum_server::from_tcp_rustls(socket, tls)
.handle(handle)
.serve(router.into_make_service_with_connect_info::<std::net::SocketAddr>())
.await
.expect("Axum server crashed."),
.expect("Axum server for tracker API crashed."),
None => axum_server::from_tcp(socket)
.handle(handle)
.serve(router.into_make_service_with_connect_info::<std::net::SocketAddr>())
.await
.expect("Axum server crashed."),
.expect("Axum server for tracker API crashed."),
}
});

info!(target: "API", "API server started on {protocol}://{}", address);

tx_start
.send(Started { address })
.expect("the HTTP(s) Tracker service should not be dropped");
.expect("the HTTP(s) Tracker API service should not be dropped");

running
}
Expand Down
2 changes: 1 addition & 1 deletion src/servers/http/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ impl Launcher {
tokio::task::spawn(graceful_shutdown(
handle.clone(),
rx_halt,
format!("shutting down http server on socket address: {address}"),
format!("Shutting down http server on socket address: {address}"),
));

let tls = self.tls.clone();
Expand Down
13 changes: 9 additions & 4 deletions src/servers/signals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,16 @@ pub async fn global_shutdown_signal() {
///
/// Will panic if the `stop_receiver` resolves with an error.
pub async fn shutdown_signal(rx_halt: tokio::sync::oneshot::Receiver<Halted>) {
let halt = async { rx_halt.await.expect("Failed to install stop signal.") };
let halt = async {
match rx_halt.await {
Ok(signal) => signal,
Err(err) => panic!("Failed to install stop signal: {err}"),
}
};

tokio::select! {
_ = halt => {},
() = global_shutdown_signal() => {}
signal = halt => { info!("Halt signal processed: {}", signal) },
() = global_shutdown_signal() => { info!("Global shutdown signal processed") }
}
}

Expand All @@ -64,7 +69,7 @@ pub async fn shutdown_signal_with_message(rx_halt: tokio::sync::oneshot::Receive
pub async fn graceful_shutdown(handle: axum_server::Handle, rx_halt: tokio::sync::oneshot::Receiver<Halted>, message: String) {
shutdown_signal_with_message(rx_halt, message).await;

info!("sending graceful shutdown signal");
info!("Sending graceful shutdown signal");
handle.graceful_shutdown(Some(Duration::from_secs(90)));

println!("!! shuting down in 90 seconds !!");
Expand Down

0 comments on commit a876d8c

Please sign in to comment.