Skip to content

largemodel_unit_test_CI #496

largemodel_unit_test_CI

largemodel_unit_test_CI #496

name: largemodel_unit_test_CI
# runs unit tests on CUDA machine with large model tests.
on:
workflow_call:
workflow_dispatch:
push:
branches:
- mainline
paths-ignore:
- '**.md'
pull_request:
branches:
- mainline
concurrency:
group: large-model-unit-tests-${{ github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
Start-Runner:
name: Start self-hosted EC2 runner
runs-on: ubuntu-latest
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.LARGEMODELTEST_ACCESSKEY }}
aws-secret-access-key: ${{ secrets.LARGEMODELTEST_SECRETACCESSKEY }}
aws-region: ${{ secrets.LARGEMODELTEST_AWS_REGION }}
- name: Start EC2 runner
id: start-ec2-runner
uses: machulav/ec2-github-runner@v2
with:
mode: start
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
ec2-image-id: ${{ vars.MARQO_CUDA_TESTS_INSTANCE_AMI }}
ec2-instance-type: g4dn.2xlarge
subnet-id: ${{ secrets.LARGEMODELTEST_SUBNET_ID }}
security-group-id: ${{ secrets.LARGEMODELTEST_SECURITY_GROUP }}
Test-Marqo:
name: Run Large Model Unit Tests
needs: Start-Runner # required to start the main job when the runner is ready
runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
environment: marqo-test-suite
steps:
- name: Checkout marqo repo
uses: actions/checkout@v3
with:
fetch-depth: 0
path: marqo
- name: Set up Python 3.8
uses: actions/setup-python@v3
with:
python-version: "3.8"
cache: "pip"
- name: Checkout marqo-base for requirements
uses: actions/checkout@v3
with:
repository: marqo-ai/marqo-base
path: marqo-base
- name: Install dependencies
run: |
pip install -r marqo-base/requirements.txt
# override base requirements with marqo requirements, if needed:
pip install -r marqo/requirements.dev.txt
pip install pytest==7.4.0
- name: Build Vespa
run: |
systemctl stop unattended-upgrades
apt-get remove -y unattended-upgrades
echo "Updating package list"
apt-get update -y
# Build Vespa components
echo "Installing jdk 17"
sudo apt-get install openjdk-17-jdk -y
export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
export PATH=$JAVA_HOME/bin:$PATH
echo "Installing maven"
sudo apt-get install maven -y
echo "Building Vespa components"
cd marqo/vespa
mvn clean package
- name: Start Vespa
run: |
# Define these for checking if Vespa is ready
export VESPA_CONFIG_URL=http://localhost:19071
export VESPA_DOCUMENT_URL=http://localhost:8080
export VESPA_QUERY_URL=http://localhost:8080
cd marqo/scripts/vespa_local
set -x
python vespa_local.py start
set +x
echo "Waiting for Vespa to start"
for i in {1..20}; do
echo -ne "Waiting... $i seconds\r"
sleep 1
done
echo -e "\nDone waiting."
# Zip up schemas and services
sudo apt-get install zip -y
zip -r vespa_tester_app.zip services.xml schemas
# Deploy application with test schema
curl --header "Content-Type:application/zip" --data-binary @vespa_tester_app.zip http://localhost:19071/application/v2/tenant/default/prepareandactivate
# wait for vespa to start (document url):
timeout 10m bash -c 'until curl -f -X GET $VESPA_DOCUMENT_URL >/dev/null 2>&1; do echo " Waiting for Vespa document API to be available..."; sleep 10; done;' || \
(echo "Vespa (Document URL) did not start in time" && exit 1)
echo "Vespa document API is available. Local Vespa setup complete."
# Delete the zip file
rm vespa_tester_app.zip
echo "Deleted vespa_tester_app.zip"
- name: Run Large Model Unit Tests
run: |
# Define these for use by marqo
export VESPA_CONFIG_URL=http://localhost:19071
export VESPA_DOCUMENT_URL=http://localhost:8080
export VESPA_QUERY_URL=http://localhost:8080
export MARQO_MAX_CPU_MODEL_MEMORY=15
export MARQO_MAX_CUDA_MODEL_MEMORY=15
export PYTHONPATH="./marqo/tests:./marqo/src:./marqo"
pytest marqo/tests --largemodel --ignore=marqo/tests/test_documentation.py
Stop-Runner:
name: Stop self-hosted EC2 runner
needs:
- Start-Runner # required to get output from the start-runner job
- Test-Marqo # required to wait when the main job is done
runs-on: ubuntu-latest
if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
steps:
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.LARGEMODELTEST_ACCESSKEY }}
aws-secret-access-key: ${{ secrets.LARGEMODELTEST_SECRETACCESSKEY }}
aws-region: ${{ secrets.LARGEMODELTEST_AWS_REGION }}
- name: Stop EC2 runner
uses: machulav/ec2-github-runner@v2
with:
mode: stop
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
label: ${{ needs.start-runner.outputs.label }}
ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}