Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
KuilongCui committed Sep 12, 2024
1 parent d631720 commit 1f7dea5
Show file tree
Hide file tree
Showing 10 changed files with 17 additions and 11,782 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ lint: check_pylint_installed check_pytest_installed

.PHONY: test
test: check_pytest_installed
@pytest -x --ignore=third_party/ --ignore=tests/e2e_test --disable-warnings
@pytest -x -s --ignore=third_party/ --ignore=tests/e2e_test --disable-warnings

#################### pygloo install for gloo migration backend begin ####################

Expand Down
Binary file removed benchmark_len.png
Binary file not shown.
2 changes: 1 addition & 1 deletion conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
def pytest_sessionstart(session):
subprocess.run(["ray", "stop", "--force"], check=True)
sleep(3)
subprocess.run(["ray", "start", "--head"], check=True)
subprocess.run(["ray", "start", "--head", "--disable-usage-stats", "--port=30050"], check=True)
sleep(3)

def pytest_sessionfinish(session, exitstatus):
Expand Down
815 changes: 0 additions & 815 deletions instance_37037.out

This file was deleted.

10,947 changes: 0 additions & 10,947 deletions instance_37038.out

This file was deleted.

7 changes: 5 additions & 2 deletions llumnix/backends/vllm/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,8 @@ def step(self) -> None:
instance_info.step_id = next(self.step_counter)
instance_info.timestamp = time.time()
instance_info.latency = self.model_executor.last_inference_latency

self.scheduler.scheduler_lock.acquire()
seq_groups = self.scheduler.running
if seq_groups:
tot_blocks = []
Expand All @@ -174,7 +176,8 @@ def step(self) -> None:
tot_blocks.extend(blocks)
tot_blocks = set(tot_blocks)
instance_info.num_blocks_last_running_request = len(tot_blocks)

self.scheduler.scheduler_lock.release()

if request_outputs:
self._put_request_outputs_to_server(request_outputs, server_infos)
self.instance_info = instance_info
Expand Down Expand Up @@ -257,8 +260,8 @@ def commit_dst_request(self, backend_request: SequenceGroupLlumnix) -> None:
logger.info("add seq {} to block table".format(seq.seq_id))
pre_alloc_blocks = self.engine.scheduler.pre_alloc_cache_dict.pop(backend_request.request_id)
self.engine.scheduler.block_manager.add_block_table(pre_alloc_blocks, seq.seq_id)
self.add_running_request(backend_request)
backend_request.reset_migration_args()
self.add_running_request(backend_request)

def send_blocks(self, dst_ray_actor: "ray.actor.ActorHandle", src_blocks: List[int], dst_blocks: List[int]) -> None:
ray.get(dst_ray_actor.execute_engine_method.remote("_run_workers",
Expand Down
2 changes: 1 addition & 1 deletion llumnix/backends/vllm/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def init_migration(self, instance_id: str, migration_config: MigrationConfig, sr
self.instance_id = instance_id
self.global_world_size = 0
self.global_rank = -1
self.migration_config = migration_config
# self.migration_config = migration_config
self.migration_backend: MigrationBackendBase = get_migration_backend(migration_config=migration_config,
cache_engine=self.cache_engine,
worker_handle_list=src_worker_handle_list,
Expand Down
17 changes: 5 additions & 12 deletions tests/e2e_test/test_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
import json
import os
import subprocess
import unittest
import pytest
import ray
import torch
Expand All @@ -36,6 +35,7 @@ def generate_bench_command(ip_ports: str, model: str, num_prompts: int, dataset_
f"--backend vLLM "
f"--tokenizer {model} "
f"--trust_remote_code "
f"--log_filename bench_{ip_ports} "
f"--random_prompt_count {num_prompts} "
f"--dataset_type {dataset_type} "
f"--dataset_path {dataset_path} "
Expand Down Expand Up @@ -96,21 +96,14 @@ def parse_log_file():
@pytest.mark.asyncio
@pytest.mark.parametrize("model", ['/mnt/model/Qwen-7B'])
async def test_simple_benchmark(model):
# clear state
shutdown_llumnix_service()

os.environ['HEAD_NODE_IP'] = "127.0.0.1"
os.environ['HEAD_NODE'] = "1"
launch_ray_cluster(ray_cluster_port=30050)

device_count = torch.cuda.device_count()
base_port = 37037
for i in range(device_count):
launch_command = generate_launch_command(result_filename=str(base_port+i)+".out",
launch_ray_cluster=False, port=base_port+i, model=model)
subprocess.run(launch_command, shell=True, check=True)

await asyncio.sleep(90)
await asyncio.sleep(60)

async def run_bench_command(command):
process = await asyncio.create_subprocess_shell(command)
Expand All @@ -126,11 +119,11 @@ async def run_bench_command(command):
results_filename=f"{base_port+i}.out")
tasks.append(run_bench_command(bench_command))

await asyncio.wait(tasks, timeout=60*30)

parse_log_file()

shutdown_llumnix_service()
clear_ray_state()
assert 1==0
await asyncio.sleep(10)

if __name__ == '__main__':
unittest.main()
5 changes: 3 additions & 2 deletions tests/e2e_test/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ def generate_launch_command(result_filename: str = "", launch_ray_cluster: bool
f"nohup python -m llumnix.entrypoints.vllm.api_server "
f"--host {ip} "
f"--port {port} "
f"--disable-init-instance-by-manager "
f"--disable-fixed-node-init-instance "
f"--initial-instances {instances_num} "
f"--enable-migration "
f"--model {model} "
Expand All @@ -42,7 +44,7 @@ def generate_launch_command(result_filename: str = "", launch_ray_cluster: bool
f"--tensor-parallel-size 1 "
f"--request-output-queue-port {1234+port} "
f"{'--launch-ray-cluster ' if launch_ray_cluster else ''}"
f"{'> '+result_filename if len(result_filename)> 0 else ''} &"
f"{'> instance_'+result_filename if len(result_filename)> 0 else ''} &"
)
return command

Expand Down Expand Up @@ -107,7 +109,6 @@ async def test_e2e(model):
}

# generate llumnix outputs
shutdown_llumnix_service()
launch_llumnix_service(model, max_model_len=max_model_len)
await asyncio.sleep(60)

Expand Down
2 changes: 1 addition & 1 deletion tools/bench_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ set -ex

nvidia-docker run --rm -t --net host --ipc host -v ${PWD}:/workspace -v /mnt:/mnt -w /workspace \
registry.cn-beijing.aliyuncs.com/llumnix/llumnix-dev:20240909_action_678a439 \
bash -c "pip install -e . > /dev/null && pytest ./tests/e2e_test/test_bench.py"
bash -c "pip install -e . > /dev/null && pytest -s ./tests/e2e_test/test_bench.py"

0 comments on commit 1f7dea5

Please sign in to comment.