diff --git a/CHANGELOG.md b/CHANGELOG.md index 1bda2f1..a1f7599 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,11 +4,18 @@ All notable changes to this project will be documented in this file. ## [Unreleased] +## [0.3.2] - 2024-08-20 + ### Changed - Don't use the `aws-lc-rs` crate (introduced in [#45]), as it broke the Tilt build ([#46]). +### Fixed + +- Fix division by zero when all clusters of a cluster group are not ready to accept queries ([#47]). + [#46]: https://github.com/stackabletech/trino-lb/pull/46 +[#47]: https://github.com/stackabletech/trino-lb/pull/47 ## [0.3.1] - 2024-08-16 diff --git a/README.md b/README.md index b3c73f3..82d01f7 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ Make sure you clone this repo and run the following command from the root direct In case you don't have any Trino cluster at hand you can start trino-lb without any Trino cluster as follows: ```bash -docker run -p 8080:8080 -v ./example-configs/simple-no-trino.yaml:/etc/trino-lb-config.yaml --rm oci.stackable.tech/stackable/trino-lb:0.1.0 +docker run -p 8080:8080 -v ./example-configs/simple-no-trino.yaml:/etc/trino-lb-config.yaml --rm oci.stackable.tech/stackable/trino-lb:0.3.1 ``` This starts trino-lb listening on http://127.0.0.1:8080. @@ -44,12 +44,12 @@ As you can see by `QUEUED_IN_TRINO_LB`, the query is queued in trino-lb indefini Let's assume your Trino cluster is accessible at `https://127.0.0.1:8443` using the username `admin` and password `admin`. -First check the config file `example-configs/simple-single-trino.yaml` and swap out the hostname and credentials corresponding to your Trino cluster. +First check the config file `example-configs/docker-simple-single-trino.yaml` and swap out the hostname and credentials corresponding to your Trino cluster. Afterwards start trino-lb using the following command. We are using self-signed certificates for testing purpose here. ```bash -docker run -p 443:8443 -v ./example-configs/simple-single-trino.yaml:/etc/trino-lb-config.yaml -v ./example-configs/self-signed-certs/:/self-signed-certs/ --rm oci.stackable.tech/stackable/trino-lb:0.1.0 +docker run -p 443:8443 -v ./example-configs/docker-simple-single-trino.yaml:/etc/trino-lb-config.yaml -v ./example-configs/self-signed-certs/:/self-signed-certs/ --rm oci.stackable.tech/stackable/trino-lb:0.3.1 ``` > [!NOTE] @@ -58,7 +58,7 @@ docker run -p 443:8443 -v ./example-configs/simple-single-trino.yaml:/etc/trino- We can send queries to trino-lb and it will forward them to the configured Trino cluster. ```bash -java -jar ~/Downloads/trino-cli-435-executable.jar --server https://127.0.0.1:443 --insecure --user admin --password +java -jar ~/Downloads/trino-cli-*-executable.jar --server https://127.0.0.1:443 --insecure --user admin --password Password: trino> select 42; _col0 diff --git a/example-configs/docker-simple-single-trino.yaml b/example-configs/docker-simple-single-trino.yaml new file mode 100644 index 0000000..6dbb782 --- /dev/null +++ b/example-configs/docker-simple-single-trino.yaml @@ -0,0 +1,27 @@ +trinoLb: + externalAddress: https://127.0.0.1:443 # exposed docker port is 443 + # When you enable authentication trino-clients enforce https encryption + tls: + enabled: true + certPemFile: /self-signed-certs/cert.pem + keyPemFile: /self-signed-certs/key.pem + # Use in-memory persistence which will loose all queued running queries on restart + persistence: + inMemory: {} +trinoClusterGroups: + default: + # Once a cluster has more running queries than this no further queries will be send to it + # They will be queued in trino-lb instead + maxRunningQueries: 1 + trinoClusters: + - name: trino-default-1 + endpoint: https://5.250.181.98:8443 # FIXME + credentials: + username: admin + password: adminadmin # FIXME +# Your Trino probably does not have a globally trusted certificate +trinoClusterGroupsIgnoreCert: true + +# Route all queries to the "default" cluster group +routers: [] +routingFallback: default diff --git a/trino-lb/src/scaling/mod.rs b/trino-lb/src/scaling/mod.rs index a1982b8..89b587e 100644 --- a/trino-lb/src/scaling/mod.rs +++ b/trino-lb/src/scaling/mod.rs @@ -317,7 +317,7 @@ impl Scaler { target_states.insert(to_start.name.to_owned(), ClusterState::Starting); } } - } else { + } else if queued == 0 { // Determine excess clusters, this only makes sense when we don't upscale let cluster_query_counters = try_join_all( clusters @@ -335,7 +335,18 @@ impl Scaler { .map(|(c, _)| c.max_running_queries) .sum(); let current_running_queries: u64 = cluster_query_counters.iter().sum(); - let utilization_percent = 100 * current_running_queries / max_running_queries; + + let utilization_percent = match (max_running_queries, current_running_queries) { + // No cluster is ready to accept queries and no queries running + (0, 0) => 0, + // No cluster is ready to accept queries but there are some queries still running + // This means the clusters are even more utilized than they normally should have (although they e.g. can + // have some queries running during draining) + // So we set the utilization to 100% + (0, _) => 100, + // We can calculate the percentage normally, it's safe to divide by max_running_queries + (_, _) => 100 * current_running_queries / max_running_queries, + }; debug!( current_running_queries,