0xSpaceShard · FabijanC · May 3, 2024 · May 3, 2024 · May 3, 2024 · May 3, 2024
diff --git a/README.md b/README.md
@@ -487,6 +487,10 @@ To retrieve the current configuration of Devnet, send a GET request to `/config`
 
 ## Development
 
+### Installation
+
+Some developer scripts used in this project depend on Python 3, with dependencies specified in `scripts/requirements.txt`. You may want to [install them in a virtual environment](https://docs.python.org/3/library/venv.html#creating-virtual-environments).
+
 ### Development - Visual Studio Code
 
 It is highly recommended to get familiar with [Visual Studio Code Dev Containers](https://code.visualstudio.com/docs/devcontainers/create-dev-container#_dockerfile) and install [rust-analyzer](https://code.visualstudio.com/docs/languages/rust) extension.
@@ -545,13 +549,13 @@ To speed up development, you can put all the previous steps (and more) in a scri
 
 ### Development - Testing
 
-### Prerequisites
+#### Prerequisites
 
 Some tests require the `anvil` command, so you need to [install Foundry](https://book.getfoundry.sh/getting-started/installation). The `anvil` command might not be usable by tests if you run them using VS Code's `Run Test` button available just above the test case. Either run tests using a shell which has foundry/anvil in `PATH`, or modify the BackgroundAnvil Command to specify `anvil` by its path on your system.
 
 To ensure that integration tests pass, be sure to have run `cargo build --release` or `cargo run --release` prior to testing. This builds the production target used in integration tests, so spawning BackgroundDevnet won't time out.
 
-### Test execution
+#### Test execution
 
 Run all tests using all available CPUs with:
 
@@ -565,6 +569,10 @@ The previous command might cause your testing to die along the way due to memory
 $ cargo test --jobs <N>
 ```
 
+#### Benchmarking
+
+To test if your contribution presents an improvement in execution time, check out the script at `scripts/benchmark/command_stat_test.py`.
+
 ### Development - Docker
 
 Due to internal needs, images with arch suffix are built and pushed to Docker Hub, but this is not mentioned in the user docs as users should NOT be needing it.

diff --git a/scripts/benchmark/command_stat_test.py b/scripts/benchmark/command_stat_test.py
@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+
+"""
+This program tests if a command performs faster using independent t-test. At the top of the file,
+there are command placeholders which you need to define. You may change other constants
+if needed. Optionally, modify `performance_program`, a function used to simulate the
+work that is timed.
+
+The program will start the command defined in `ORIGINAL_COMMAND`,
+run `performance_program` `SAMPLE_SIZE` times, store these times,
+and repeat the same process for `IMPROVED_COMMAND`.
+The times shall than be statistically tested and the result printed.
+"""
+
+import subprocess
+import time
+from typing import List
+
+import requests
+from scipy.stats import ttest_ind
+
+DEVNET_PORT = "5050"
+DEVNET_URL = f"http://localhost:{DEVNET_PORT}"
+REQUEST_TIMEOUT = 2
+
+ORIGINAL_COMMAND: str = ...
+"""
+The original baseline command used for starting Devnet. E.g.:
+```
+f"cargo run --release -- --port {DEVNET_PORT}"
+```
+Be sure to have compiled the program before executing the script to avoid timeout.
+"""
+
+IMPROVED_COMMAND: str = ...
+"""
+The command used for starting Devnet in improved mode. E.g.:
+```
+f"cargo run --release -- --port {DEVNET_PORT} --lite-mode"
+```
+"""
+
+ALTERNATIVE_HYPOTHESIS = "greater"
+"""
+The null-hypothesis is that the two analyzed samples come from equal sources,
+i.e. that the two commands under test perform equally fast. The alternative is "greater"
+because the original command is supposed to be slower, i.e. yield greater times.
+If you want to use this script to test if two commands are different,
+change the alternative to "two-sided".
+"""
+
+SAMPLE_SIZE = 20
+
+
+def ensure_process_started(proc: subprocess.Popen):
+    """Ensure the process under test is started"""
+    max_retries = 20
+    for _ in range(max_retries):
+        if proc.returncode is not None:
+            raise RuntimeError(f"Process exited with returncode {proc.returncode}")
+
+        try:
+            resp = requests.get(f"{DEVNET_URL}/is_alive", timeout=REQUEST_TIMEOUT)
+            if resp.status_code == 200:
+                return
+        except requests.exceptions.ConnectionError:
+            pass
+
+        time.sleep(0.5)
+
+    raise RuntimeError("Could not start process")
+
+
+def performance_program():
+    """
+    The program whose performance time is measured for sample generation.
+    You may completely change the execution logic.
+    """
+    mint_url = f"{DEVNET_URL}/mint"
+    req_body = {"amount": 1, "address": "0x1"}
+    for _ in range(500):
+        resp = requests.post(mint_url, json=req_body, timeout=REQUEST_TIMEOUT)
+        assert resp.status_code == 200
+
+
+def terminate_and_wait(proc: subprocess.Popen):
+    """Terminates the process and waits."""
+    proc.terminate()
+    proc.wait()
+
+
+def get_sample(command: str, size: int) -> List[float]:
+    """
+    Run `command` and run `performance_program` `size` times.
+    Returns a list containing `size` measured times.
+    """
+    total_start_time = time.time()
+
+    times = []
+
+    for _ in range(size):
+        with subprocess.Popen(
+            command.split(), stdout=subprocess.DEVNULL
+        ) as command_proc:
+            ensure_process_started(command_proc)
+
+            start_time = time.time()
+            performance_program()
+            measured_time = time.time() - start_time
+
+            print(f"Measured time: {measured_time}")
+            times.append(measured_time)
+
+            terminate_and_wait(command_proc)
+
+    total_time = time.time() - total_start_time
+    print(f"Collected sample of size {size} in {total_time:.2f}s")
+    return times
+
+
+def main():
+    """Run statistical testing"""
+
+    samples = []
+    for command in [ORIGINAL_COMMAND, IMPROVED_COMMAND]:
+        print(f"Collecting sample for: {command}")
+        samples.append(get_sample(command, SAMPLE_SIZE))
+
+    result = ttest_ind(samples[0], samples[1], alternative=ALTERNATIVE_HYPOTHESIS)
+    print(result)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/requirements.txt b/scripts/requirements.txt
@@ -0,0 +1,7 @@
+certifi==2024.2.2
+charset-normalizer==3.3.2
+idna==3.7
+numpy==1.26.4
+requests==2.31.0
+scipy==1.13.0
+urllib3==2.2.1