largemodel_unit_test_CI #496

Workflow file for this run

.github/workflows/largemodel_unit_test_CI.yml at 9d2452f

	name: largemodel_unit_test_CI
	# runs unit tests on CUDA machine with large model tests.

	on:
	workflow_call:
	workflow_dispatch:
	push:
	branches:
	- mainline
	paths-ignore:
	- '**.md'
	pull_request:
	branches:
	- mainline

	concurrency:
	group: large-model-unit-tests-${{ github.ref }}
	cancel-in-progress: true

	permissions:
	contents: read

	jobs:
	Start-Runner:
	name: Start self-hosted EC2 runner
	runs-on: ubuntu-latest
	outputs:
	label: ${{ steps.start-ec2-runner.outputs.label }}
	ec2-instance-id: ${{ steps.start-ec2-runner.outputs.ec2-instance-id }}
	steps:
	- name: Configure AWS credentials
	uses: aws-actions/configure-aws-credentials@v1
	with:
	aws-access-key-id: ${{ secrets.LARGEMODELTEST_ACCESSKEY }}
	aws-secret-access-key: ${{ secrets.LARGEMODELTEST_SECRETACCESSKEY }}
	aws-region: ${{ secrets.LARGEMODELTEST_AWS_REGION }}
	- name: Start EC2 runner
	id: start-ec2-runner
	uses: machulav/ec2-github-runner@v2
	with:
	mode: start
	github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
	ec2-image-id: ${{ vars.MARQO_CUDA_TESTS_INSTANCE_AMI }}
	ec2-instance-type: g4dn.2xlarge
	subnet-id: ${{ secrets.LARGEMODELTEST_SUBNET_ID }}
	security-group-id: ${{ secrets.LARGEMODELTEST_SECURITY_GROUP }}

	Test-Marqo:
	name: Run Large Model Unit Tests
	needs: Start-Runner # required to start the main job when the runner is ready
	runs-on: ${{ needs.start-runner.outputs.label }} # run the job on the newly created runner
	environment: marqo-test-suite
	steps:
	- name: Checkout marqo repo
	uses: actions/checkout@v3
	with:
	fetch-depth: 0
	path: marqo

	- name: Set up Python 3.8
	uses: actions/setup-python@v3
	with:
	python-version: "3.8"
	cache: "pip"

	- name: Checkout marqo-base for requirements
	uses: actions/checkout@v3
	with:
	repository: marqo-ai/marqo-base
	path: marqo-base

	- name: Install dependencies
	run: \|
	pip install -r marqo-base/requirements.txt
	# override base requirements with marqo requirements, if needed:
	pip install -r marqo/requirements.dev.txt
	pip install pytest==7.4.0

	- name: Build Vespa
	run: \|
	systemctl stop unattended-upgrades
	apt-get remove -y unattended-upgrades

	echo "Updating package list"
	apt-get update -y

	# Build Vespa components
	echo "Installing jdk 17"
	sudo apt-get install openjdk-17-jdk -y
	export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
	export PATH=$JAVA_HOME/bin:$PATH
	echo "Installing maven"
	sudo apt-get install maven -y
	echo "Building Vespa components"
	cd marqo/vespa
	mvn clean package

	- name: Start Vespa
	run: \|
	# Define these for checking if Vespa is ready
	export VESPA_CONFIG_URL=http://localhost:19071
	export VESPA_DOCUMENT_URL=http://localhost:8080
	export VESPA_QUERY_URL=http://localhost:8080


	cd marqo/scripts/vespa_local
	set -x
	python vespa_local.py start
	set +x

	echo "Waiting for Vespa to start"
	for i in {1..20}; do
	echo -ne "Waiting... $i seconds\r"
	sleep 1
	done
	echo -e "\nDone waiting."

	# Zip up schemas and services
	sudo apt-get install zip -y
	zip -r vespa_tester_app.zip services.xml schemas

	# Deploy application with test schema
	curl --header "Content-Type:application/zip" --data-binary @vespa_tester_app.zip http://localhost:19071/application/v2/tenant/default/prepareandactivate

	# wait for vespa to start (document url):
	timeout 10m bash -c 'until curl -f -X GET $VESPA_DOCUMENT_URL >/dev/null 2>&1; do echo " Waiting for Vespa document API to be available..."; sleep 10; done;' \|\| \
	(echo "Vespa (Document URL) did not start in time" && exit 1)

	echo "Vespa document API is available. Local Vespa setup complete."

	# Delete the zip file
	rm vespa_tester_app.zip
	echo "Deleted vespa_tester_app.zip"

	- name: Run Large Model Unit Tests
	run: \|
	# Define these for use by marqo
	export VESPA_CONFIG_URL=http://localhost:19071
	export VESPA_DOCUMENT_URL=http://localhost:8080
	export VESPA_QUERY_URL=http://localhost:8080
	export MARQO_MAX_CPU_MODEL_MEMORY=15
	export MARQO_MAX_CUDA_MODEL_MEMORY=15

	export PYTHONPATH="./marqo/tests:./marqo/src:./marqo"
	pytest marqo/tests --largemodel --ignore=marqo/tests/test_documentation.py

	Stop-Runner:
	name: Stop self-hosted EC2 runner
	needs:
	- Start-Runner # required to get output from the start-runner job
	- Test-Marqo # required to wait when the main job is done
	runs-on: ubuntu-latest
	if: ${{ always() }} # required to stop the runner even if the error happened in the previous jobs
	steps:
	- name: Configure AWS credentials
	uses: aws-actions/configure-aws-credentials@v1
	with:
	aws-access-key-id: ${{ secrets.LARGEMODELTEST_ACCESSKEY }}
	aws-secret-access-key: ${{ secrets.LARGEMODELTEST_SECRETACCESSKEY }}
	aws-region: ${{ secrets.LARGEMODELTEST_AWS_REGION }}
	- name: Stop EC2 runner
	uses: machulav/ec2-github-runner@v2
	with:
	mode: stop
	github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
	label: ${{ needs.start-runner.outputs.label }}
	ec2-instance-id: ${{ needs.start-runner.outputs.ec2-instance-id }}

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

largemodel_unit_test_CI #496

Workflow file

largemodel_unit_test_CI #496

Jobs

Run details

Workflow file for this run