Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a launcher option to the tester #825

Merged
merged 14 commits into from
Oct 18, 2023
3 changes: 3 additions & 0 deletions legate/tester/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@
"LEGATE_TEST": "1",
}

#: Default number of nodes to use
DEFAULT_NODES = 1

#: Feature values that are accepted for --use, in the relative order
#: that the corresponding test stages should always execute in
FEATURES: tuple[FeatureType, ...] = (
Expand Down
27 changes: 27 additions & 0 deletions legate/tester/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
DEFAULT_GPU_DELAY,
DEFAULT_GPU_MEMORY_BUDGET,
DEFAULT_GPUS_PER_NODE,
DEFAULT_NODES,
DEFAULT_NUMAMEM,
DEFAULT_OMPS_PER_NODE,
DEFAULT_OMPTHREADS,
Expand Down Expand Up @@ -197,6 +198,32 @@
help="Number of ranks per node to use",
)

feature_opts.add_argument(
"--launcher",
dest="launcher",
choices=["mpirun", "jsrun", "srun", "none"],
default="none",
help='launcher program to use (set to "none" for local runs, or if '
"the launch has already happened by the time legate is invoked)",
)

parser.add_argument(
"--launcher-extra",
dest="launcher_extra",
action="append",
default=[],
required=False,
help="additional argument to pass to the launcher (can appear more "
"than once)",
)

feature_opts.add_argument(
"--nodes",
dest="nodes",
type=int,
default=DEFAULT_NODES,
help="Number of nodes to use",
)

test_opts = parser.add_argument_group("Test run configuration options")

Expand Down
3 changes: 3 additions & 0 deletions legate/tester/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ def __init__(self, argv: ArgList) -> None:
self.ompthreads = args.ompthreads
self.numamem = args.numamem
self.ranks = args.ranks
self.launcher = args.launcher
self.launcher_extra = args.launcher_extra
self.nodes = args.nodes

# test run configuration
self.timeout = args.timeout
Expand Down
9 changes: 9 additions & 0 deletions legate/tester/stages/_linux/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,15 @@ def shard_args(self, shard: Shard, config: Config) -> ArgList:
"--ranks-per-node",
str(config.ranks),
]
if config.nodes > 1:
args += [
"--nodes",
str(config.nodes),
]
if config.launcher != "none":
args += ["--launcher", str(config.launcher)]
for extra in config.launcher_extra:
args += ["--launcher-extra=" + str(extra)]
return args

def compute_spec(self, config: Config, system: TestSystem) -> StageSpec:
Expand Down
9 changes: 9 additions & 0 deletions legate/tester/stages/_linux/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,15 @@ def shard_args(self, shard: Shard, config: Config) -> ArgList:
"--ranks-per-node",
str(config.ranks),
]
if config.nodes > 1:
args += [
"--nodes",
str(config.nodes),
]
if config.launcher != "none":
args += ["--launcher", str(config.launcher)]
for extra in config.launcher_extra:
args += ["--launcher-extra=" + str(extra)]
bryevdv marked this conversation as resolved.
Show resolved Hide resolved
return args

def compute_spec(self, config: Config, system: TestSystem) -> StageSpec:
Expand Down
9 changes: 9 additions & 0 deletions legate/tester/stages/_linux/omp.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,15 @@ def shard_args(self, shard: Shard, config: Config) -> ArgList:
"--ranks-per-node",
str(config.ranks),
]
if config.nodes > 1:
args += [
"--nodes",
str(config.nodes),
]
if config.launcher != "none":
args += ["--launcher", str(config.launcher)]
for extra in config.launcher_extra:
args += ["--launcher-extra=" + str(extra)]
return args

def compute_spec(self, config: Config, system: TestSystem) -> StageSpec:
Expand Down
4 changes: 4 additions & 0 deletions tests/unit/legate/tester/test___init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
DEFAULT_GPU_DELAY,
DEFAULT_GPU_MEMORY_BUDGET,
DEFAULT_GPUS_PER_NODE,
DEFAULT_NODES,
DEFAULT_NUMAMEM,
DEFAULT_OMPS_PER_NODE,
DEFAULT_OMPTHREADS,
Expand Down Expand Up @@ -64,6 +65,9 @@ def test_DEFAULT_PROCESS_ENV(self) -> None:
"LEGATE_TEST": "1",
}

def test_DEFAULT_NODES(self) -> None:
assert DEFAULT_NODES == 1

def test_FEATURES(self) -> None:
assert FEATURES == ("cpus", "cuda", "eager", "openmp")

Expand Down
14 changes: 14 additions & 0 deletions tests/unit/legate/tester/test_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@
DEFAULT_GPU_DELAY,
DEFAULT_GPU_MEMORY_BUDGET,
DEFAULT_GPUS_PER_NODE,
DEFAULT_NODES,
DEFAULT_NUMAMEM,
DEFAULT_OMPS_PER_NODE,
DEFAULT_OMPTHREADS,
DEFAULT_RANKS_PER_NODE,
args as m,
)

Expand Down Expand Up @@ -66,6 +68,18 @@ def test_ompthreads(self) -> None:
def test_numamem(self) -> None:
assert m.parser.get_default("numamem") == DEFAULT_NUMAMEM

def test_ranks(self) -> None:
assert m.parser.get_default("ranks") == DEFAULT_RANKS_PER_NODE
bryevdv marked this conversation as resolved.
Show resolved Hide resolved

def test_launcher(self) -> None:
assert m.parser.get_default("launcher") == "none"

def test_launcher_extra(self) -> None:
assert m.parser.get_default("launcher_extra") == []

def test_nodes(self) -> None:
assert m.parser.get_default("nodes") == DEFAULT_NODES

def test_timeout(self) -> None:
assert m.parser.get_default("timeout") is None

Expand Down
6 changes: 6 additions & 0 deletions tests/unit/legate/tester/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,10 @@
DEFAULT_GPU_DELAY,
DEFAULT_GPU_MEMORY_BUDGET,
DEFAULT_GPUS_PER_NODE,
DEFAULT_NODES,
DEFAULT_OMPS_PER_NODE,
DEFAULT_OMPTHREADS,
DEFAULT_RANKS_PER_NODE,
FEATURES,
config as m,
)
Expand Down Expand Up @@ -62,6 +64,10 @@ def test_default_init(self) -> None:
assert c.bloat_factor == DEFAULT_GPU_BLOAT_FACTOR
assert c.omps == DEFAULT_OMPS_PER_NODE
assert c.ompthreads == DEFAULT_OMPTHREADS
assert c.ranks == DEFAULT_RANKS_PER_NODE
assert c.launcher == "none"
assert c.launcher_extra == []
assert c.nodes == DEFAULT_NODES

assert c.timeout is None
assert c.debug is False
Expand Down