diff --git a/Cargo.lock b/Cargo.lock index 4791a4fe..7f7d2844 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,19 +4,13 @@ version = 3 [[package]] name = "addr2line" -version = "0.22.0" +version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" +checksum = "f5fb1d8e4442bd405fdfd1dacb42792696b0cf9cb15882e5d097b742a676d375" dependencies = [ "gimli", ] -[[package]] -name = "adler" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" - [[package]] name = "adler2" version = "2.0.0" @@ -86,6 +80,12 @@ dependencies = [ "syn 2.0.77", ] +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "autocfg" version = "1.3.0" @@ -94,9 +94,9 @@ checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" [[package]] name = "axum" -version = "0.7.5" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf" +checksum = "8f43644eed690f5374f1af436ecd6aea01cd201f6fbdf0178adaf6907afb2cec" dependencies = [ "async-trait", "axum-core", @@ -120,17 +120,16 @@ dependencies = [ "serde_urlencoded", "sync_wrapper 1.0.1", "tokio", - "tower", + "tower 0.5.1", "tower-layer", "tower-service", - "tracing", ] [[package]] name = "axum-core" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a15c63fd72d41492dc4f497196f5da1fb04fb7529e631d73630d1b491e47a2e3" +checksum = "5e6b8ba012a258d63c9adfa28b9ddcf66149da6f986c5b5452e629d5ee64bf00" dependencies = [ "async-trait", "bytes", @@ -141,25 +140,24 @@ dependencies = [ "mime", "pin-project-lite", "rustversion", - "sync_wrapper 0.1.2", + "sync_wrapper 1.0.1", "tower-layer", "tower-service", - "tracing", ] [[package]] name = "backtrace" -version = "0.3.73" +version = "0.3.74" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" +checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a" dependencies = [ "addr2line", - "cc", "cfg-if", "libc", - "miniz_oxide 0.7.4", + "miniz_oxide", "object", "rustc-demangle", + "windows-targets", ] [[package]] @@ -197,15 +195,15 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.7.1" +version = "1.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" +checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" [[package]] name = "cc" -version = "1.1.15" +version = "1.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "57b6a275aa2903740dc87da01c62040406b8812552e97129a63ea8850a17c6e6" +checksum = "07b1695e2c7e8fc85310cde85aeaab7e3097f593c91d209d3f9df76c928100f0" dependencies = [ "shlex", ] @@ -227,9 +225,9 @@ dependencies = [ [[package]] name = "cpufeatures" -version = "0.2.13" +version = "0.2.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51e852e6dc9a5bed1fae92dd2375037bf2b768725bf3be87811edee3249d09ad" +checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" dependencies = [ "libc", ] @@ -361,7 +359,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "324a1be68054ef05ad64b861cc9eaf1d623d2d8cb25b4bf2cb9cdd902b4bf253" dependencies = [ "crc32fast", - "miniz_oxide 0.8.0", + "miniz_oxide", ] [[package]] @@ -452,9 +450,28 @@ dependencies = [ [[package]] name = "gimli" -version = "0.29.0" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32085ea23f3234fc7846555e85283ba4de91e21016dc0455a16286d87a292d64" + +[[package]] +name = "h2" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" +checksum = "524e8ac6999421f49a846c2d4411f337e53497d8ec55d67753beffa43c5d9205" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] [[package]] name = "hashbrown" @@ -523,6 +540,7 @@ dependencies = [ "bytes", "futures-channel", "futures-util", + "h2", "http", "http-body", "httparse", @@ -536,9 +554,9 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.27.2" +version = "0.27.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ee4be2c948921a1a5320b629c4193916ed787a7f7f293fd3f7f5a6c9de74155" +checksum = "08afdbb5c31130e3034af566421053ab03787c640246a446327f550d11bcb333" dependencies = [ "futures-util", "http", @@ -554,9 +572,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.7" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9" +checksum = "da62f120a8a37763efb0cf8fdf264b884c7b8b9ac8660b900c8661030c00e6ba" dependencies = [ "bytes", "futures-channel", @@ -567,7 +585,7 @@ dependencies = [ "pin-project-lite", "socket2", "tokio", - "tower", + "tower 0.4.13", "tower-service", "tracing", ] @@ -595,9 +613,9 @@ dependencies = [ [[package]] name = "ipnet" -version = "2.9.0" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" +checksum = "187674a687eed5fe42285b40c6291f9a01517d415fad1c3cbc6a9f778af7fcd4" [[package]] name = "itoa" @@ -670,15 +688,6 @@ dependencies = [ "unicase", ] -[[package]] -name = "miniz_oxide" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" -dependencies = [ - "adler", -] - [[package]] name = "miniz_oxide" version = "0.8.0" @@ -702,14 +711,13 @@ dependencies = [ [[package]] name = "mosec" -version = "0.8.7" +version = "0.8.8" dependencies = [ "async-channel", "async-stream", "axum", "bytes", "derive_more", - "hyper", "prometheus-client", "serde", "serde_json", @@ -789,9 +797,9 @@ checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" [[package]] name = "parking" -version = "2.2.0" +version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" +checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" [[package]] name = "parking_lot" @@ -1023,9 +1031,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.3" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" +checksum = "0884ad60e090bf1345b93da0a5de8923c93884cd03f40dfcfddd3b4bee661853" dependencies = [ "bitflags", ] @@ -1165,9 +1173,9 @@ checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" [[package]] name = "rustls" -version = "0.23.12" +version = "0.23.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c58f8c84392efc0a126acce10fa59ff7b3d2ac06ab451a33f2741989b806b044" +checksum = "f2dabaac7466917e566adb06783a81ca48944c6898a1b08b9374106dd671f4c8" dependencies = [ "once_cell", "ring", @@ -1195,9 +1203,9 @@ checksum = "fc0a2ce646f8655401bb81e7927b812614bd5d91dbc968696be50603510fcaf0" [[package]] name = "rustls-webpki" -version = "0.102.7" +version = "0.102.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84678086bd54edf2b415183ed7a94d0efb049f1b646a33e22a36f3794be6ae56" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" dependencies = [ "ring", "rustls-pki-types", @@ -1233,18 +1241,18 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "serde" -version = "1.0.209" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.209" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", @@ -1253,9 +1261,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.127" +version = "1.0.128" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8043c06d9f82bd7271361ed64f415fe5e12a77fdb52e573e7f06a516dea329ad" +checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" dependencies = [ "itoa", "memchr", @@ -1510,6 +1518,19 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-util" +version = "0.7.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61e7c3654c13bcd040d4a03abee2c75b1d14a37b423cf5a813ceae1cc903ec6a" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + [[package]] name = "toml_datetime" version = "0.6.8" @@ -1518,9 +1539,9 @@ checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" [[package]] name = "toml_edit" -version = "0.22.20" +version = "0.22.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "583c44c02ad26b0c3f3066fe629275e50627026c51ac2e595cca4c230ce1ce1d" +checksum = "3b072cee73c449a636ffd6f32bd8de3a9f7119139aff882f44943ce2986dc5cf" dependencies = [ "indexmap", "toml_datetime", @@ -1540,7 +1561,21 @@ dependencies = [ "tokio", "tower-layer", "tower-service", - "tracing", +] + +[[package]] +name = "tower" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2873938d487c3cfb9aed7546dc9f2711d867c9f90c46b889989a2cb84eba6b4f" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper 0.1.2", + "tokio", + "tower-layer", + "tower-service", ] [[package]] @@ -1561,7 +1596,6 @@ version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ - "log", "pin-project-lite", "tracing-attributes", "tracing-core", @@ -1656,24 +1690,24 @@ checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" [[package]] name = "unicode-normalization" -version = "0.1.23" +version = "0.1.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" +checksum = "5033c97c4262335cded6d6fc3e5c18ab755e1a3dc96376350f3d8e9f009ad956" dependencies = [ "tinyvec", ] [[package]] name = "unicode-xid" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "229730647fbc343e3a80e463c1db7f78f3855d3f3739bee0dda773c9a037c90a" +checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" [[package]] name = "untrusted" @@ -1850,9 +1884,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.26.5" +version = "0.26.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0bd24728e5af82c6c4ec1b66ac4844bdf8156257fccda846ec58b42cd0cdbe6a" +checksum = "841c67bff177718f1d4dfefde8d8f0e78f9b6589319ba88312f567fc5841a958" dependencies = [ "rustls-pki-types", ] diff --git a/Cargo.toml b/Cargo.toml index 58d0dd52..2e9414c4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mosec" -version = "0.8.7" +version = "0.8.8" authors = ["Keming ", "Zichen "] edition = "2021" license = "Apache-2.0" @@ -10,9 +10,7 @@ description = "Model Serving made Efficient in the Cloud." documentation = "https://docs.rs/mosec" exclude = ["target", "examples", "tests", "scripts"] -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -hyper = { version = "1", features = ["http1", "server"] } bytes = "1" tracing = "0.1" tracing-subscriber = { version = "0.3", features = ["local-time", "json"] } @@ -21,7 +19,7 @@ derive_more = { version = "1", features = ["display", "error", "from"] } # MPMS that only one consumer sees each message & async async-channel = "2.2" prometheus-client = "0.22" -axum = "0.7" +axum = { version = "0.7", default-features = false, features = ["matched-path", "original-uri", "query", "tokio", "http1", "http2"]} async-stream = "0.3.5" serde = "1.0" serde_json = "1.0" diff --git a/README.md b/README.md index 157f2d7c..eaf275c2 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,7 @@ from mosec.mixin import MsgpackMixin logger = get_logger() ``` -Then, we **build an API** for clients to query a text prompt and obtain an image based on the [stable-diffusion-v1-5 model](https://huggingface.co/runwayml/stable-diffusion-v1-5) in just 3 steps. +Then, we **build an API** for clients to query a text prompt and obtain an image based on the [stable-diffusion-v1-5 model](https://huggingface.co/stable-diffusion-v1-5/stable-diffusion-v1-5) in just 3 steps. 1) Define your service as a class which inherits `mosec.Worker`. Here we also inherit `MsgpackMixin` to employ the [msgpack](https://msgpack.org/index.html) serialization format(a). @@ -104,10 +104,9 @@ Then, we **build an API** for clients to query a text prompt and obtain an image class StableDiffusion(MsgpackMixin, Worker): def __init__(self): self.pipe = StableDiffusionPipeline.from_pretrained( - "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 + "sd-legacy/stable-diffusion-v1-5", torch_dtype=torch.float16 ) - device = "cuda" if torch.cuda.is_available() else "cpu" - self.pipe = self.pipe.to(device) + self.pipe.enable_model_cpu_offload() self.example = ["useless example prompt"] * 4 # warmup (batch_size=4) def forward(self, data: List[str]) -> List[memoryview]: @@ -229,6 +228,7 @@ More ready-to-use examples can be found in the [Example](https://mosecorg.github - For multi-stage services, note that the data passing through different stages will be serialized/deserialized by the `serialize_ipc/deserialize_ipc` methods, so extremely large data might make the whole pipeline slow. The serialized data is passed to the next stage through rust by default, you could enable shared memory to potentially reduce the latency (ref [RedisShmIPCMixin](https://mosecorg.github.io/mosec/examples/ipc.html#redis-shm-ipc-py)). - You should choose appropriate `serialize/deserialize` methods, which are used to decode the user request and encode the response. By default, both are using JSON. However, images and embeddings are not well supported by JSON. You can choose msgpack which is faster and binary compatible (ref [Stable Diffusion](https://mosecorg.github.io/mosec/examples/stable_diffusion.html)). - Configure the threads for OpenBLAS or MKL. It might not be able to choose the most suitable CPUs used by the current Python process. You can configure it for each worker by using the [env](https://mosecorg.github.io/mosec/reference/interface.html#mosec.server.Server.append_worker) (ref [custom GPU allocation](https://mosecorg.github.io/mosec/examples/env.html)). +- Enable HTTP/2 from client side. `mosec` automatically adapts to user's protocol (e.g., HTTP/2) since v0.8.8. ## Adopters diff --git a/examples/stable_diffusion/server.py b/examples/stable_diffusion/server.py index af796960..14775629 100644 --- a/examples/stable_diffusion/server.py +++ b/examples/stable_diffusion/server.py @@ -27,10 +27,10 @@ class StableDiffusion(MsgpackMixin, Worker): def __init__(self): self.pipe = StableDiffusionPipeline.from_pretrained( - "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16 + "sd-legacy/stable-diffusion-v1-5", + torch_dtype=torch.float16, ) - device = "cuda" if torch.cuda.is_available() else "cpu" - self.pipe = self.pipe.to(device) # type: ignore + self.pipe.enable_model_cpu_offload() self.example = ["useless example prompt"] * 4 # warmup (bs=4) def forward(self, data: List[str]) -> List[memoryview]: diff --git a/requirements/dev.txt b/requirements/dev.txt index 0fc63aaf..b84d5aa9 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -5,5 +5,5 @@ mypy~=1.11 pyright~=1.1 ruff~=0.6 pre-commit>=2.15.0 -httpx==0.27.2 +httpx[http2]==0.27.2 httpx-sse==0.4.0 diff --git a/src/main.rs b/src/main.rs index 7499bf3d..4221155f 100644 --- a/src/main.rs +++ b/src/main.rs @@ -123,7 +123,7 @@ fn main() { .with( output .with_filter(filter::filter_fn(|metadata| { - !metadata.target().starts_with("hyper") + !metadata.target().starts_with("h2") })) .with_filter(filter::LevelFilter::DEBUG), ) diff --git a/src/routes.rs b/src/routes.rs index a62c4b51..f4ef6ba9 100644 --- a/src/routes.rs +++ b/src/routes.rs @@ -15,12 +15,11 @@ use std::time::Duration; use axum::body::{to_bytes, Body}; -use axum::http::Uri; +use axum::http::header::{HeaderValue, CONTENT_TYPE}; +use axum::http::{Request, Response, StatusCode, Uri}; use axum::response::sse::{Event, KeepAlive, Sse}; use axum::response::IntoResponse; use bytes::Bytes; -use hyper::header::{HeaderValue, CONTENT_TYPE}; -use hyper::{Request, Response, StatusCode}; use prometheus_client::encoding::text::encode; use tracing::warn; use utoipa::OpenApi; diff --git a/src/tasks.rs b/src/tasks.rs index 126d0120..2d45a009 100644 --- a/src/tasks.rs +++ b/src/tasks.rs @@ -18,8 +18,8 @@ use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Mutex, OnceLock}; use std::time::{Duration, Instant}; +use axum::http::StatusCode; use bytes::Bytes; -use hyper::StatusCode; use tokio::sync::{mpsc, oneshot, Barrier}; use tokio::time; use tracing::{debug, error, info, warn}; diff --git a/tests/test_service.py b/tests/test_service.py index 6cb5ada3..9368f38f 100644 --- a/tests/test_service.py +++ b/tests/test_service.py @@ -40,6 +40,15 @@ def http_client(): yield client +@pytest.fixture +def http2_client(): + # force to use HTTP/2 + with httpx.Client( + base_url=f"http://127.0.0.1:{TEST_PORT}", http1=False, http2=True + ) as client: + yield client + + @pytest.fixture(scope="session") def mosec_service(request): params = request.param.split(" ") @@ -54,6 +63,19 @@ def mosec_service(request): assert wait_for_port_free(port=TEST_PORT), "service failed to stop" +@pytest.mark.parametrize( + "mosec_service, http2_client", + [ + pytest.param("square_service", "", id="HTTP/2"), + ], + indirect=["mosec_service", "http2_client"], +) +def test_http2_service(mosec_service, http2_client): + resp = http2_client.get("/") + assert resp.status_code == HTTPStatus.OK + assert resp.http_version == "HTTP/2" + + @pytest.mark.parametrize( "mosec_service, http_client", [