diff --git a/.gitignore b/.gitignore index 9bc59cf..3e29bf8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,28 @@ -*.pyc +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# Distribution / packaging +/build/ /dist/ -/*.egg-info -.tox -.cache -/.vscode/ +*.egg +*.eggs +*.egg-info/ +MANIFEST + +# For Visual Studio Code +.vscode/ + +# Mac .DS_Store -/build/ \ No newline at end of file + +# Unit test / coverage reports +.[nt]ox/ +htmlcov/ +.coverage +.coverage.* +.*cache +nosetests.xml +coverage.xml +*.cover diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..3bb7b03 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,41 @@ +default_language_version: + python: python3.6 +default_stages: [commit] + +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-added-large-files + - id: check-yaml + - id: check-case-conflict +- repo: https://github.com/pycqa/isort + rev: 5.8.0 + hooks: + - id: isort + name: isort + args: ["--profile", "black"] +- repo: https://github.com/psf/black + rev: 20.8b1 + hooks: + - id: black +- repo: https://gitlab.com/pycqa/flake8 + rev: 3.8.4 + hooks: + - id: flake8 +- repo: local + hooks: + - id: pylint + name: pylint + entry: pylint + language: python + types: [python] + files: scaleapi/ + additional_dependencies: + - 'pylint>=2.7.4' + - 'requests>=2.25.0' + - 'urllib3>=1.26.0' + - 'pytest>=6.2.2' + language_version: python3.6 diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..8c3132a --- /dev/null +++ b/.pylintrc @@ -0,0 +1,8 @@ +[MASTER] +disable= + missing-module-docstring, + too-few-public-methods, + too-many-locals, + too-many-arguments, + too-many-instance-attributes, + invalid-name, diff --git a/MANIFEST b/MANIFEST deleted file mode 100644 index 45be37e..0000000 --- a/MANIFEST +++ /dev/null @@ -1,5 +0,0 @@ -# file GENERATED by distutils, do NOT edit -setup.cfg -setup.py -scaleapi/__init__.py -scaleapi/tasks.py diff --git a/README.rst b/README.rst index 96bebb6..95cf508 100644 --- a/README.rst +++ b/README.rst @@ -1,6 +1,22 @@ -===================== +********************* Scale AI | Python SDK -===================== +********************* + +If you use earlier versions of the SDK, please refer to `v1.0.4 documentation `_. + +If you are migrating from earlier versions to v2, please refer to `Migration Guide to v2 `_. + +|pic1| |pic2| |pic3| + +.. |pic1| image:: https://pepy.tech/badge/scaleapi/month + :alt: Downloads + :target: https://pepy.tech/project/scaleapi +.. |pic2| image:: https://img.shields.io/pypi/pyversions/scaleapi.svg + :alt: Supported Versions + :target: https://pypi.org/project/scaleapi +.. |pic3| image:: https://img.shields.io/github/contributors/scaleapi/scaleapi-python-client.svg + :alt: Contributors + :target: https://github.com/scaleapi/scaleapi-python-client/graphs/contributors Installation ____________ @@ -9,8 +25,6 @@ ____________ $ pip install --upgrade scaleapi -Note: We strongly suggest using `scaleapi` with Python version 2.7.9 or greater due to SSL issues with prior versions. - Usage _____ @@ -23,11 +37,11 @@ Tasks _____ Most of these methods will return a `scaleapi.Task` object, which will contain information -about the json response (task_id, status, etc.). +about the json response (task_id, status, params, response, etc.). Any parameter available in `Scale's API documentation`__ can be passed as an argument option with the corresponding type. -__ https://docs.scale.com/reference#task-object +__ https://docs.scale.com/reference#tasks-object-overview The following endpoints for tasks are available: @@ -38,15 +52,18 @@ This method can be used for any Scale supported task type using the following fo .. code-block:: python - client.create_{{Task Type}}_task(...) + client.create_task(TaskType, ...task parameters...) Passing in the applicable values into the function definition. The applicable fields and further information for each task type can be found in `Scale's API documentation`__. -__ https://docs.scale.com/reference#general-image-annotation +__ https://docs.scale.com/reference .. code-block:: python - client.create_imageannotation_task( + from scaleapi.tasks import TaskType + + client.create_task( + TaskType.ImageAnnotation, project = 'test_project', callback_url = "http://www.example.com/callback", instruction= "Draw a box around each baby cow and big cow.", @@ -61,8 +78,8 @@ __ https://docs.scale.com/reference#general-image-annotation } ) -Retrieve task -^^^^^^^^^^^^^ +Retrieve a task +^^^^^^^^^^^^^^^ Retrieve a task given its id. Check out `Scale's API documentation`__ for more information. @@ -70,42 +87,56 @@ __ https://docs.scale.com/reference#retrieve-tasks .. code-block :: python - task = client.fetch_task('asdfasdfasdfasdfasdfasdf') - print(task.status) // Task status ('pending', 'completed', 'error', 'canceled') - print(task.response) // If task is complete + task = client.get_task('30553edd0b6a93f8f05f0fee') + print(task.status) # Task status ('pending', 'completed', 'error', 'canceled') + print(task.response) # If task is complete List Tasks ^^^^^^^^^^ -Retrieve a list of tasks, with optional filter by start and end date/time. Paginated with `next_token`. The return value is a `scaleapi.Tasklist`, which acts as a list, but also has fields for the total number of tasks, the limit and offset, and whether or not there's more. Check out `Scale's API documentation`__ for more information. +Retrieve a list of `Task` objects, with filters for: ``project_name``, ``batch_name``, ``type``, ``status``, +``review_status``, ``unique_id``, ``completed_after``, ``completed_before``, ``updated_after``, ``updated_before``, +``created_after``, ``created_before`` and ``tags``. + +``get_tasks()`` is a **generator** method and yields ``Task`` objects. + +`A generator is another type of function, returns an iterable that you can loop over like a list. +However, unlike lists, generators do not store the content in the memory. +That helps you to process a large number of objects without increasing memory usage.` + +If you will iterate through the tasks and process them once, using a generator is the most efficient method. +However, if you need to process the list of tasks multiple times, you can wrap the generator in a ``list(...)`` +statement, which returns a list of Tasks by loading them into the memory. + +Check out `Scale's API documentation`__ for more information. __ https://docs.scale.com/reference#list-multiple-tasks .. code-block :: python - next_token = None; - counter = 0 - all_tasks =[] - while True: - tasks = client.tasks( - start_time = "2020-09-08", - end_time = "2021-01-01", - customer_review_status = "accepted", - next_token = next_token, - ) - for task in tasks: - counter += 1 - print('Downloading Task %s | %s' % (counter, task.task_id)) - all_tasks.append(task.__dict__['param_dict']) - next_token = tasks.next_token - if next_token is None: - break - print(all_tasks) + from scaleapi.tasks import TaskReviewStatus, TaskStatus + + tasks = client.get_tasks( + project_name = "My Project", + created_after = "2020-09-08", + completed_before = "2021-04-01", + status = TaskStatus.Completed, + review_status = TaskReviewStatus.Accepted + ) + + # Iterating through the generator + for task in tasks: + # Download task or do something! + print(task.task_id) + + # For retrieving results as a Task list + task_list = list(tasks) + print(f"{len(task_list))} tasks retrieved") Cancel Task ^^^^^^^^^^^ -Cancel a task given its id if work has not started on the task (task status is `Queued` in the UI). Check out `Scale's API documentation`__ for more information. +Cancel a task given its id if work has not started on the task (task status is ``Queued`` in the UI). Check out `Scale's API documentation`__ for more information. __ https://docs.scale.com/reference#cancel-task @@ -153,8 +184,13 @@ __ https://docs.scale.com/reference#batch-status client.batch_status(batch_name = 'batch_name_01_07_2021') -Retrieve Batch -^^^^^^^^^^^^^^ + # Alternative via Batch.get_status() + batch = client.get_batch('batch_name_01_07_2021') + batch.get_status() # Refreshes tasks_{status} attributes of Batch + print(batch.tasks_pending, batch.tasks_completed) + +Retrieve A Batch +^^^^^^^^^^^^^^^^ Retrieve a single Batch. Check out `Scale's API documentation`__ for more information. @@ -167,27 +203,37 @@ __ https://docs.scale.com/reference#batch-retrieval List Batches ^^^^^^^^^^^^ -Retrieve a list of Batches. Check out `Scale's API documentation`__ for more information. +Retrieve a list of Batches. Optional parameters are ``project_name``, ``batch_status``, ``created_after`` and ``created_before``. + +``get_batches()`` is a **generator** method and yields ``Batch`` objects. + +`A generator is another type of function, returns an iterable that you can loop over like a list. +However, unlike lists, generators do not store the content in the memory. +That helps you to process a large number of objects without increasing memory usage.` + +When wrapped in a ``list(...)`` statement, it returns a list of Batches by loading them into the memory. + +Check out `Scale's API documentation`__ for more information. __ https://docs.scale.com/reference#batch-list .. code-block :: python - next_token = None; + from scaleapi.batches import BatchStatus + + batches = client.get_batches( + batch_status=BatchStatus.Completed, + created_after = "2020-09-08" + ) + counter = 0 - all_batchs =[] - while True: - batches = client.list_batches( - status = "completed" - ) - for batch in batches: - counter += 1 - print('Downloading Batch %s | %s | %s' % (counter, batch.name, batch.param_dict['status'])) - all_batchs.append(batch.__dict__['param_dict']) - next_token = batches.next_token - if next_token is None: - break - print(all_batchs) + for batch in batches: + counter += 1 + print(f'Downloading batch {counter} | {batch.name} | {batch.project}') + + # Alternative for accessing as a Batch list + batch_list = list(batches) + print(f"{len(batch_list))} batches retrieved") Projects ________ @@ -221,7 +267,7 @@ __ https://docs.scale.com/reference#project-retrieval List Projects ^^^^^^^^^^^^^ -This function does not take any arguments. Retrieve a list of every Project. +This function does not take any arguments. Retrieve a list of every Project. Check out `Scale's API documentation`__ for more information. __ https://docs.scale.com/reference#batch-list @@ -232,7 +278,7 @@ __ https://docs.scale.com/reference#batch-list projects = client.projects() for project in projects: counter += 1 - print('Downloading project %s | %s | %s' % (counter, project['name'], project['type'])) + print(f'Downloading project {counter} | {project.name} | {project.type}') Update Project ^^^^^^^^^^^^^^ @@ -245,7 +291,7 @@ __ https://docs.scale.com/reference#project-update-parameters data = client.update_project( project_name='test_project', - pathc = false, + patch = false, instruction='update: Please label all the stuff', ) @@ -253,15 +299,30 @@ Error handling ______________ If something went wrong while making API calls, then exceptions will be raised automatically -as a `scaleapi.ScaleException` or `scaleapi.ScaleInvalidRequest` runtime error. For example: +as a `ScaleException` parent type and child exceptions: + +- ``ScaleInvalidRequest``: 400 - Bad Request -- The request was unacceptable, often due to missing a required parameter. +- ``ScaleUnauthorized``: 401 - Unauthorized -- No valid API key provided. +- ``ScaleNotEnabled``: 402 - Not enabled -- Please contact sales@scaleapi.com before creating this type of task. +- ``ScaleResourceNotFound``: 404 - Not Found -- The requested resource doesn't exist. +- ``ScaleDuplicateTask``: 409 - Conflict -- The provided idempotency key or unique_id is already in use for a different request. +- ``ScaleTooManyRequests``: 429 - Too Many Requests -- Too many requests hit the API too quickly. +- ``ScaleInternalError``: 500 - Internal Server Error -- We had a problem with our server. Try again later +- ``ScaleTimeoutError``: 504 - Server Timeout Error -- Try again later. + +Check out `Scale's API documentation `_ for more details. + +For example: .. code-block:: python - try - client.create_categorization_task('Some parameters are missing.') - except scaleapi.ValidationError as e: - print(e.code) # 400 - print(e.message) # missing param X + from scaleapi.exceptions import ScaleException + + try: + client.create_task(TaskType.TextCollection, attachment='Some parameters are missing.') + except ScaleException as err: + print(err.code) # 400 + print(err.message) # Parameter is invalid, reason: "attachments" is required Troubleshooting _______________ diff --git a/docs/dev_requirements.txt b/docs/dev_requirements.txt new file mode 100644 index 0000000..8a5d25e --- /dev/null +++ b/docs/dev_requirements.txt @@ -0,0 +1,6 @@ +black>=19.10b0 +flake8>=3.8.4 +pre-commit==2.11.1 +isort>=5.7.0 +pytest>=6.2.2 +pylint>=2.7.2 diff --git a/docs/developer_guide.md b/docs/developer_guide.md new file mode 100644 index 0000000..50f44c6 --- /dev/null +++ b/docs/developer_guide.md @@ -0,0 +1,104 @@ +# Developer Guide for Python SDK + +### 1. Clone repo + +Clone git repo into your local machine. + +```bash +$ git clone git@github.com:scaleapi/scaleapi-python-client.git +``` + +### 2. Install required packages + +If you use a virtual environment (via venv or conda), please activate it before installing the following packages. + +_Python SDK v2+ supports only Python 3.6+_ + +```bash +$ pip install -r docs/dev_requirements.txt +``` +### 3. Setup pre-commit + +Assure pre-commit1 is installed: +```bash +$ pre-commit --version +# pre-commit 2.11.1 +``` + +Configure pre-commit for the repo: +```bash +pre-commit install +``` +Now `pre-commit` will run automatically on `git commit`! + +### 4. (Optional) VS Code Settings + +Press `Cmd+Shift+P` to open Command Palette on VSCode to find **Preferences: Open Settings (JSON)**. + +If you want to make those settings only apply to current workspace (not VS Code general), choose **Preferences: Open Workspace Settings (JSON)** + +- Enables `pylint`[2] and `flake8`[3] as linters together +- Auto-formats python files on save according to `black` + +Append following lines to the json file: +```json +"python.linting.enabled": true, +"python.linting.pylintEnabled": true, +"python.linting.flake8Enabled": true, +"python.formatting.provider": "black", +"[python]": { + "editor.formatOnSave": true, + "editor.defaultFormatter": "ms-python.python" + }, +``` + +### 5. Running pre-commit Tests Manually + +You can run following command to run pre-commit linter for all files, without a commit. It provides a report for issues as well as fixes formatting. + +```bash +$ pre-commit run --all-files + +Trim Trailing Whitespace.................................................Passed +Fix End of Files.........................................................Passed +Check for added large files..............................................Passed +Check Yaml...............................................................Passed +Check for case conflicts.................................................Passed +isort....................................................................Passed +black....................................................................Passed +flake8...................................................................Passed +pylint...................................................................Passed +``` + +### 6. Running pytest Test Cases + +Before pushing your code changes, you can run `pytest` to test existing cases. You can add new test cases if you're adding a new method or functionality to be tested. + +In order to run `pytest` you need to set environment variable `SCALE_TEST_API_KEY` as your Scale user's test key. + +```bash +$ SCALE_TEST_API_KEY="{apikey}|{userid}|test" python3 -m pytest -v + +================================ test session starts ================================ +platform darwin -- Python 3.6.12, pytest-6.2.2, py-1.10.0, pluggy-0.13.1 +cachedir: .pytest_cache +rootdir: /Users/git/scaleapi-python-client +collected 31 items + +tests/test_client.py::test_invalidkey_fail PASSED [ 3%] +tests/test_client.py::test_uniquekey_fail PASSED [ 6%] +tests/test_client.py::test_categorize_ok PASSED [ 9%] +tests/test_client.py::test_categorize_fail PASSED [ 12%] +tests/test_client.py::test_transcription_ok PASSED [ 16%] +......... +``` + +#### 7. Deployment and Publishing of a new version + +Please refer to [Deployment and Publishing Guide](pypi_update_guide.md) for details. +_____ +[1] pre-commit configuration is available in `.pre-commit-config.yaml` + +[2] Pylint configuration is available in `.pylintrc` + +[3] flake8 configuration is available in `setup.cfg` diff --git a/docs/migration_guide.md b/docs/migration_guide.md new file mode 100644 index 0000000..300f19c --- /dev/null +++ b/docs/migration_guide.md @@ -0,0 +1,88 @@ + +# SDK Migration Guide to v2 + + +If you are migrating from v0 or v1, this guide explains how to update your application for compatibility with v2. We recommend migrating as soon as possible to ensure that your application is unaffected. + +### Creating New Tasks + +Methods with task types such as `create_imageannotation_task`, `create_textcollection_task` etc. are ~~**deprecated**~~. + +Creating a new task is now unified under the `create_task(TaskType, ...)` method. Please review [Create Task](../README.rst#create-task) section for more details. + + +```python +# Deprecated Old Method +client.create_imageannotation_task( + project = 'test_project', + instruction= "Draw a box around each baby cow and big cow.", + ... +) + +# New Method +from scaleapi.tasks import TaskType + +client.create_task( + TaskType.ImageAnnotation, + project = 'test_project', + instruction= "Draw a box around each baby cow and big cow.", + ... +) +``` + +### Retrieving Tasks + +A new generator method is introduced to retrieve a list of tasks with all available parameters. The new method `get_tasks(...)` handles pagination and tokens. +You can have a simpler code by replacing `tasks()` loops with a single `get_tasks()` call. + +Please refer to [List Tasks](../README.rst#list-tasks) for more details. + +### Accessing Attributes (Task, Batch, Project) + +The old `param_dict` attribute is now replaced with a method `as_dict()` to return an object's attributes as a dictionary. + +First-level attributes of Task are still accessible with `.` annotation as the following: + +```python +task.status # same as task.as_dict()["status"] +task.params["geometries"] # same as task.as_dict()["params"]["geometries"] +task.response["annotations"] # same as task.as_dict()["response"]["annotations"] +``` + +Accessing `task.params` child objects directly at task level is ~~**deprecated**~~. Instead of `task.attribute`, you should use `task.params["attribute"]` for accessing objects under `params`. + +```python +task.params["geometries"] # DEPRECATED access => task.geometries +task.params["attachment"] # DEPRECATED access => task.attachment +``` + +### Task Counts as Summary of Batch + +Attributes of Batch `pending`, `completed`, `error`, `canceled` are replaced with `tasks_pending`, `tasks_completed`, `tasks_error`, `tasks_canceled` respectively to avoid confusion. + +```python +# NEW Attributes # DEPRECATED Attributes + +batch.tasks_pending # batch.pending +batch.tasks_completed # batch.completed +batch.tasks_error # batch.error +batch.tasks_canceled # batch.canceled +``` + +### Deprecated Methods + +- `fetch_task()` replaced with `get_task()` +- `list_batches()` replaced with `get_batches()` + +### Enabled Auto-Retry + +SDK now supports auto-retry in case of a `TimeOut(504)` or `TooManyRequests(429)` error occurs. + +### New Exceptions + +New error types are introduces if you want to handle specific exception cases. +`ScaleInvalidRequest`, `ScaleUnauthorized`, `ScaleNotEnabled`, `ScaleResourceNotFound`, `ScaleDuplicateTask`, `ScaleTooManyRequests`, `ScaleInternalError` and `ScaleTimeoutError`. + +All new error types are child of the existing `ScaleException` which can be used to handle all cases. + +Please review [Error handling](../README.rst#error-handling) section for more details. diff --git a/docs/pypi_update_guide.md b/docs/pypi_update_guide.md new file mode 100644 index 0000000..a89d4dc --- /dev/null +++ b/docs/pypi_update_guide.md @@ -0,0 +1,39 @@ +# Deployment and Publishing Guide for Python SDK + +_Creating and deploying a new package version is easy!_ + +### Prerequisites + +1. Ensure you're on the latest `master` branch + +2. Ensure you have access to a PyPI account that is a maintainer of [scaleapi](https://pypi.org/project/scaleapi/) on PyPI + +### Deployment Steps: + +**Step 0: Critical - Bump Project Version** + +Ensure `_version.py` has an updated project version. If not, please increment the project version, commit and push the changes. + +We use [semantic versioning](https://packaging.python.org/guides/distributing-packages-using-setuptools/#semantic-versioning-preferred). If you are adding a meaningful feature, bump the minor version. If you are fixing a bug, bump the incremental version. + +**Step 1: Run Publish Script** + +```bash +./publish.sh +``` + +If you want to run test cases via `pytest` before publishing, add the _optional_ `runtest` arg to the script. + +You need to set your own test key as `SCALE_TEST_API_KEY` environment variable before running. + +```bash +SCALE_TEST_API_KEY="{apikey}|{userid}|test" ./publish.sh runtest +``` + +**Step 2: Check out the PyPI page to ensure all looks good** + +[https://pypi.org/project/scaleapi/](https://pypi.org/project/scaleapi/) + +**Step 3: Create a New Release** + +Create a [new release](https://github.com/scaleapi/scaleapi-python-client/releases/new) on GitHub with a matching version tag _(i.e. v2.0.1)_. Please provide a summary about new features and fixed bugs in the Release Notes. diff --git a/publish.sh b/publish.sh new file mode 100755 index 0000000..4fbceca --- /dev/null +++ b/publish.sh @@ -0,0 +1,85 @@ +#!/bin/bash +echo "##### STARTING BUILD and PUBLISH #####" + +VERSION_FILE="scaleapi/_version.py" + +staged_files=$(git diff --cached --name-only --diff-filter=ACMR ${VERSION_FILE}) +changed_files=$(git diff --name-only --diff-filter=ACMR ${VERSION_FILE}) + +if [[ "$staged_files" == "$VERSION_FILE" || "$changed_files" == "$VERSION_FILE" ]]; +then + echo "ERROR: You have uncommitted changes in version file: ${VERSION_FILE}" + echo " Please commit and push your changes before publishing." + exit +fi + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +cd "${DIR}" || exit 1 + +BRANCH_NAME=$(git branch 2>/dev/null | grep '^*' | tr -d ' *') +echo "Active Git Branch: ${BRANCH_NAME}" # release-1.0.5 + +# IFS='-' read -ra strarr <<< "$BRANCH_NAME" +# BRANCH_PREFIX="${strarr[0]}" # release +# BRANCH_VERSION="${strarr[1]}" # 1.0.5 + +while IFS= read -r line; do + if [[ $line == __version__* ]]; + then + IFS=' = ' read -ra strarr <<< "$line" + PKG_VERSION=$( sed -e 's/^"//' -e 's/"$//' <<< "${strarr[1]}" ) + echo "SDK Package Version: ${PKG_VERSION}" + break + fi +done < "${DIR}/${VERSION_FILE}" + +if [ "$BRANCH_NAME" != "master" ]; +then + echo "ERROR: You need to be in 'master' git branch to publish this version (${PKG_VERSION})." + exit 1 +fi + +if [ "$1" == "runtest" ]; +then + echo "Validating environment for pytest..." + if ! pip show pytest > /dev/null 2>&1; + then + echo "WARN: 'pytest' package is not found, installing..."; + pip install pytest + fi + + if [[ -z "${SCALE_TEST_API_KEY}" ]]; then + echo "Test key not found. Please assign 'SCALE_TEST_API_KEY=...' as your test environment key." + exit 1 + fi + + if ! python -m pytest; then echo "ERROR: pytest failed."; exit; fi + echo "pytest is successful!" +fi + +# Clean-up build and dist folders +rm -rf build/ dist/ + +# Build package +echo "Building package..." + +if ! python3 setup.py sdist bdist_wheel > /dev/null 2>&1; then echo "ERROR: Package building failed."; exit 1; fi + + +if ! pip show twine > /dev/null 2>&1; +then + echo "WARN: 'twine' package is not found, installing..."; + pip install twine +fi + +# Twine Validation +echo "Validating package..." + +if ! twine check --strict dist/* ; then echo "ERROR: Twine check failed."; exit 1; fi + +# Twine Upload to Pypi +echo "Uploading package..." + +if ! twine upload dist/*; then echo "ERROR: Twine upload failed."; exit 1; fi + +exit 0; diff --git a/pypi_update_guide.md b/pypi_update_guide.md deleted file mode 100644 index 000329f..0000000 --- a/pypi_update_guide.md +++ /dev/null @@ -1,51 +0,0 @@ -_Creating and deploying a new package version is easy_ - -### Prerequisites - -1. Ensure you're on the latest master - -2. Ensure you have a PyPI account created and are added as a Collaborator - -### Deployment Steps: - -**Step 0: Critical - Bump Project Version** - -In `_version.py`, you need to specify a new project version. - -We use [semantic versioning](https://packaging.python.org/guides/distributing-packages-using-setuptools/#semantic-versioning-preferred). If you are adding a meaningful feature, bump the minor version. If you are fixing a bug, bump the incremental version. - -**Step 1: Remove Previous Versions** - -Clear out any previously packages and files in the `dist` and `build/lib` folders - -**Step 2: Create a Source Distribution** - -``` -python3 setup.py sdist -``` - -**Step 3: Create `wheel`** - -You should also create a wheel for your project. A wheel is a built package that can be installed without needing to go through the “build” process. Installing wheels is substantially faster for the end user than installing from a source distribution - -``` -python3 setup.py bdist_wheel -``` - -**Step 4: Install Twine** - -Twine is what is used to manage PyPI pacakges - -``` -pip3 install twine -``` - -**Step 5: Upload distribution to PyPI** - -``` -twine upload dist/* -``` - -**Step 6: Check out the PyPI page to ensure all looks good** - -[https://pypi.org/project/scaleapi/](https://pypi.org/project/scaleapi/) diff --git a/scaleapi/__init__.py b/scaleapi/__init__.py index e4f7ccc..7d80431 100644 --- a/scaleapi/__init__.py +++ b/scaleapi/__init__.py @@ -1,52 +1,29 @@ -import requests -import platform -import urllib.parse - -from .tasks import Task -from .batches import Batch -from .projects import Project -from ._version import __version__ - -TASK_TYPES = [ - 'annotation', - 'audiotranscription', - 'categorization', - 'comparison', - 'cuboidannotation', - 'datacollection', - 'imageannotation', - 'lineannotation', - 'namedentityrecognition', - 'pointannotation', - 'polygonannotation', - 'segmentannotation', - 'transcription', - 'textcollection', - 'documenttranscription', - 'videoannotation', - 'videoboxannotation', - 'videoplaybackannotation', - 'videocuboidannotation' -] -SCALE_ENDPOINT = 'https://api.scale.com/v1/' -DEFAULT_LIMIT = 100 -DEFAULT_OFFSET = 0 - - -class ScaleException(Exception): - def __init__(self, message, errcode): - super(ScaleException, self).__init__( - ' {}'.format(errcode, message)) - self.code = errcode - - -class ScaleInvalidRequest(ScaleException, ValueError): - pass - - -class Paginator(list): - def __init__(self, docs, total, limit, offset, has_more, next_token=None): - super(Paginator, self).__init__(docs) +from typing import Dict, Generator, Generic, List, TypeVar, Union + +from scaleapi.batches import Batch, BatchStatus +from scaleapi.exceptions import ScaleInvalidRequest +from scaleapi.projects import Project + +from ._version import __version__ # noqa: F401 +from .api import Api +from .tasks import Task, TaskReviewStatus, TaskStatus, TaskType + +T = TypeVar("T") + + +class Paginator(list, Generic[T]): + """Paginator for list endpoints""" + + def __init__( + self, + docs: List[T], + total: int, + limit: int, + offset: int, + has_more: bool, + next_token=None, + ): + super().__init__(docs) self.docs = docs self.total = total self.limit = limit @@ -55,204 +32,558 @@ def __init__(self, docs, total, limit, offset, has_more, next_token=None): self.next_token = next_token -class Tasklist(Paginator): - pass +class Tasklist(Paginator[Task]): + """Tasks Paginator""" -class Batchlist(Paginator): - pass +class Batchlist(Paginator[Batch]): + """Batches Paginator""" -class ScaleClient(object): - def __init__(self, api_key, user_agent_extension=None): - self.api_key = api_key - self._headers = { - "Content-Type": "application/json", - "User-Agent": _generate_useragent(user_agent_extension) - } +class ScaleClient: + """Main class serves as an interface for Scale API""" - def _getrequest(self, endpoint, params=None): - """Makes a get request to an endpoint. + def __init__(self, api_key, source=None): + self.api = Api(api_key, source) - If an error occurs, assumes that endpoint returns JSON as: - { 'status_code': XXX, - 'error': 'I failed' } - """ - params = params or {} - r = requests.get(SCALE_ENDPOINT + endpoint, - headers=self._headers, - auth=(self.api_key, ''), params=params) - - if r.status_code == 200: - return r.json() - else: - try: - error = r.json()['error'] - except ValueError: - error = r.text - if r.status_code == 400: - raise ScaleInvalidRequest(error, r.status_code) - else: - raise ScaleException(error, r.status_code) - - def _postrequest(self, endpoint, payload=None): - """Makes a post request to an endpoint. - - If an error occurs, assumes that endpoint returns JSON as: - { 'status_code': XXX, - 'error': 'I failed' } - """ - payload = payload or {} - r = requests.post(SCALE_ENDPOINT + endpoint, json=payload, - headers=self._headers, - auth=(self.api_key, '')) - - if r.status_code == 200: - return r.json() - else: - try: - error = r.json()['error'] - except ValueError: - error = r.text - if r.status_code == 400: - raise ScaleInvalidRequest(error, r.status_code) - else: - raise ScaleException(error, r.status_code) - - def fetch_task(self, task_id): + def get_task(self, task_id: str) -> Task: """Fetches a task. - Returns the associated task. - """ - return Task(self._getrequest('task/%s' % task_id), self) - def cancel_task(self, task_id): - """Cancels a task. + Args: + task_id (str): + Task identifier + Returns: + Task: + """ + endpoint = f"task/{task_id}" + return Task(self.api.get_request(endpoint), self) - Returns the associated task. + def cancel_task(self, task_id: str) -> Task: + """Cancels a task and returns the associated task. Raises a ScaleException if it has already been canceled. + + Args: + task_id (str): + Task id + + Returns: + Task """ - return Task(self._postrequest('task/%s/cancel' % task_id), self) + endpoint = f"task/{task_id}/cancel" + return Task(self.api.post_request(endpoint), self) - def tasks(self, **kwargs): + def tasks(self, **kwargs) -> Tasklist: """Returns a list of your tasks. - Returns up to 100 at a time, to get more, use the next_token param passed back. + Returns up to 100 at a time, to get more, use the + next_token param passed back. + + Valid Args: + start_time (str): + The minimum value of `created_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + end_time (str): + The maximum value of `created_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + status (str): + Status to filter tasks, can be 'completed', 'pending', + or 'canceled' - Note that offset is deprecated. + type (str): + Task type to filter. i.e. 'imageannotation' - start/end_time are ISO8601 dates, the time range of tasks to fetch. - status can be 'completed', 'pending', or 'canceled'. - type is the task type. - limit is the max number of results to display per page, - next_token can be use to fetch the next page of tasks. - customer_review_status can be 'pending', 'fixed', 'accepted' or 'rejected'. - offset (deprecated) is the number of results to skip (for showing more pages). + project (str): + Project name to filter tasks by + + batch (str): + Batch name to filter tasks by + + customer_review_status (str): + Audit status of task, can be 'pending', 'fixed', + 'accepted' or 'rejected'. + + unique_id (List[str] | str): + The unique_id of a task. + + completed_after (str): + The minimum value of `completed_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + completed_before (str): + The maximum value of `completed_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + updated_after (str): + The minimum value of `updated_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + updated_before (str): + The maximum value of `updated_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + created_after (str): + The minimum value of `created_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + created_before (str): + The maximum value of `created_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + tags (List[str] | str): + The tags of a task; multiple tags can be + specified as a list. + + limit (int): + Determines the page size (1-100) + + next_token (str): + Can be use to fetch the next page of tasks """ - allowed_kwargs = {'start_time', 'end_time', 'status', 'type', 'project', - 'batch', 'limit', 'offset', 'completed_before', 'completed_after', - 'next_token', 'customer_review_status', 'updated_before', 'updated_after', - 'tags', 'unique_id'} + allowed_kwargs = { + "start_time", + "end_time", + "status", + "type", + "project", + "batch", + "limit", + "completed_before", + "completed_after", + "next_token", + "customer_review_status", + "tags", + "updated_before", + "updated_after", + "unique_id", + } + for key in kwargs: if key not in allowed_kwargs: - raise ScaleInvalidRequest('Illegal parameter %s for ScaleClient.tasks()' - % key, None) - response = self._getrequest('tasks', params=kwargs) - docs = [Task(json, self) for json in response['docs']] - return Tasklist(docs, response['total'], response['limit'], - response['offset'], response['has_more'], response.get('next_token')) - - def create_task(self, task_type, **kwargs): - endpoint = 'task/' + task_type - taskdata = self._postrequest(endpoint, payload=kwargs) + raise ScaleInvalidRequest( + f"Illegal parameter {key} for ScaleClient.tasks()" + ) + + response = self.api.get_request("tasks", params=kwargs) + + docs = [Task(json, self) for json in response["docs"]] + return Tasklist( + docs, + response["total"], + response["limit"], + response["offset"], + response["has_more"], + response.get("next_token"), + ) + + def get_tasks( + self, + project_name: str, + batch_name: str = None, + task_type: TaskType = None, + status: TaskStatus = None, + review_status: Union[List[TaskReviewStatus], TaskReviewStatus] = None, + unique_id: Union[List[str], str] = None, + completed_after: str = None, + completed_before: str = None, + updated_after: str = None, + updated_before: str = None, + created_after: str = None, + created_before: str = None, + tags: Union[List[str], str] = None, + ) -> Generator[Task, None, None]: + """Retrieve all tasks as a `generator` method, with the + given parameters. This methods handles pagination of + tasks() method. + + In order to retrieve results as a list, please use: + `task_list = list(get_tasks(...))` + + Args: + project_name (str): + Project Name + + batch_name (str, optional): + Batch Name + + task_type (TaskType, optional): + Task type to filter i.e. `TaskType.TextCollection` + + status (TaskStatus, optional): + Task status i.e. `TaskStatus.Completed` + + review_status (List[TaskReviewStatus] | TaskReviewStatus): + The status of the audit result of the task. + Input can be a single element or a list of + TaskReviewStatus. i.e. `TaskReviewStatus.Accepted` to + filter the tasks that you accepted after audit. + + unique_id (List[str] | str, optional): + The unique_id of a task. Multiple unique IDs can be + specified at the same time as a list. + + completed_after (str, optional): + The minimum value of `completed_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + completed_before (str, optional): + The maximum value of `completed_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + updated_after (str, optional): + The minimum value of `updated_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + updated_before (str, optional): + The maximum value of `updated_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + created_after (str, optional): + The minimum value of `created_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + created_before (str, optional): + The maximum value of `created_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + tags (List[str] | str, optional): + The tags of a task; multiple tags can be + specified as a list. + + Yields: + Generator[Task]: + Yields Task objects, can be iterated. + """ + + next_token = None + has_more = True + + while has_more: + tasks_args = { + "next_token": next_token, + "start_time": created_after, + "end_time": created_before, + "project": project_name, + "batch": batch_name, + "completed_before": completed_before, + "completed_after": completed_after, + "tags": tags, + "updated_before": updated_before, + "updated_after": updated_after, + "unique_id": unique_id, + } + + if status: + tasks_args["status"] = status.value + if task_type: + tasks_args["type"] = task_type.value + if review_status: + if isinstance(review_status, List): + value = ",".join(map(lambda x: x.value, review_status)) + else: + value = review_status.value + + tasks_args["customer_review_status"] = value + + tasks = self.tasks(**tasks_args) + for task in tasks.docs: + yield task + + next_token = tasks.next_token + has_more = tasks.has_more + + def create_task(self, task_type: TaskType, **kwargs) -> Task: + """This method can be used for any Scale supported task type. + Parameters may differ based on the given task_type. + https://github.com/scaleapi/scaleapi-python-client#create-task + + Args: + task_type (TaskType): + Task type to be created + i.e. `TaskType.ImageAnnotation` + **kwargs: + Passing in the applicable values into thefunction + definition. The applicable fields and further + information for each task type can be found in + Scale's API documentation. + https://docs.scale.com/reference + + Returns: + Task: + Returns created task. + """ + endpoint = f"task/{task_type.value}" + taskdata = self.api.post_request(endpoint, body=kwargs) return Task(taskdata, self) - def create_batch(self, project, batch_name, callback): + def create_batch(self, project: str, batch_name: str, callback: str = "") -> Batch: + """Create a new Batch within a project. + https://docs.scale.com/reference#batch-creation + + Args: + project (str): + Project name to create batch in + batch_name (str): + Batch name + callback (str, optional): + Email to notify, or URL to POST to + when a batch is complete. + + Returns: + Batch: Created batch object + """ + endpoint = "batches" payload = dict(project=project, name=batch_name, callback=callback) - batchdata = self._postrequest('batches', payload) + batchdata = self.api.post_request(endpoint, body=payload) return Batch(batchdata, self) - def finalize_batch(self, batch_name): - batchdata = self._postrequest('batches/%s/finalize' % quote_string(batch_name)) + def finalize_batch(self, batch_name: str) -> Batch: + """Finalizes a batch so its tasks can be worked on. + https://docs.scale.com/reference#batch-finalization + + Args: + batch_name (str): + Batch name + + Returns: + Batch + """ + endpoint = f"batches/{Api.quote_string(batch_name)}/finalize" + batchdata = self.api.post_request(endpoint) return Batch(batchdata, self) - def batch_status(self, batch_name): - status_data = self._getrequest('batches/%s/status' % quote_string(batch_name)) + def batch_status(self, batch_name: str) -> Dict: + """Returns the status of a batch with the counts of + its tasks grouped by task status. + https://docs.scale.com/reference#batch-status + + Args: + batch_name (str): + Batch name + + Returns: + Dict { + status: Batch status + pending (optional): # of tasks in pending stage + error (optional): # of tasks in error stage + completed (optional): # of tasks in completed stage + canceled (optional): # of tasks in canceled stage + } + """ + endpoint = f"batches/{Api.quote_string(batch_name)}/status" + status_data = self.api.get_request(endpoint) return status_data - def get_batch(self, batch_name): - batchdata = self._getrequest('batches/%s' % quote_string(batch_name)) + def get_batch(self, batch_name: str) -> Batch: + """Returns the details of a batch with the given name. + https://docs.scale.com/reference#batch-retrieval + + Args: + batch_name (str): + Batch name + + Returns: + Batch + """ + endpoint = f"batches/{Api.quote_string(batch_name)}" + batchdata = self.api.get_request(endpoint) return Batch(batchdata, self) - def list_batches(self, **kwargs): - allowed_kwargs = {'start_time', 'end_time', 'status', 'project', - 'limit', 'offset', } + def batches(self, **kwargs) -> Batchlist: + """This is a paged endpoint for all of your batches. + Pagination is based off limit and offset parameters, + which determine the page size and how many results to skip. + Returns up to 100 batches at a time (limit). + https://docs.scale.com/reference#batch-list + + Valid Args: + start_time (str): + The minimum value of `created_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + end_time (str): + The maximum value of `created_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + status (str): + Status to filter batches by + + project (str): + Project name to filter batches by + + limit (int): + Determines the page size (1-100) + + offset (int): + How many results to skip + + Returns: + Batchlist: + Paginated result. Batchlist.docs provides access + to batches list. Batchlist.limit and Batchlist.offset + are helpers for pagination. + """ + allowed_kwargs = { + "start_time", + "end_time", + "status", + "project", + "limit", + "offset", + } + for key in kwargs: if key not in allowed_kwargs: - raise ScaleInvalidRequest('Illegal parameter %s for ScaleClient.list_batches()' - % key, None) - response = self._getrequest('batches', params=kwargs) - docs = [Batch(doc, self) for doc in response['docs']] + raise ScaleInvalidRequest( + f"Illegal parameter {key} for ScaleClient.batches()" + ) + endpoint = "batches" + response = self.api.get_request(endpoint, params=kwargs) + docs = [Batch(doc, self) for doc in response["docs"]] + return Batchlist( - docs, response['totalDocs'], response['limit'], response['has_more'], response.get( - 'next_token'), + docs, + response["totalDocs"], + response["limit"], + response["offset"], + response["has_more"], ) - def create_project(self, project_name, type, params): - payload = dict(type=type, name=project_name, params=params) - projectdata = self._postrequest('projects', payload) - return Project(projectdata, self) + def get_batches( + self, + project_name: str = None, + batch_status: BatchStatus = None, + created_after: str = None, + created_before: str = None, + ) -> Generator[Batch, None, None]: + """`Generator` method to yield all batches with the given + parameters. + + In order to retrieve results as a list, please use: + `batches_list = list(get_batches(...))` + + Args: + project_name (str): + Project Name to filter batches + + batch_status (BatchStatus, optional): + i.e. `BatchStatus.Completed` + + created_after (str, optional): + The minimum value of `created_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + created_before (str, optional): + The maximum value of `created_at` in UTC timezone + ISO format: 'YYYY-MM-DD HH:MM:SS.mmmmmm' + + Yields: + Generator[Batch]: + Yields Batch, can be iterated. + """ - def get_project(self, project_name): - projectdata = self._getrequest('projects/%s' % quote_string(project_name)) + has_more = True + offset = 0 + + while has_more: + batches_args = { + "start_time": created_after, + "end_time": created_before, + "project": project_name, + "offset": offset, + } + + if batch_status: + batches_args["status"] = batch_status.value + + batches = self.batches(**batches_args) + for batch in batches.docs: + yield batch + offset += batches.limit + has_more = batches.has_more + + def create_project( + self, project_name: str, task_type: TaskType, params: Dict = None + ) -> Project: + """Creates a new project. + https://docs.scale.com/reference#project-creation + + Args: + project_name (str): + Project name + + task_type (TaskType): + Task Type i.e. `TaskType.ImageAnnotation` + + params (Dict): + Project parameters to be specificed. + i.e. `{'instruction':'Please label the kittens'}` + + Returns: + Project: [description] + """ + endpoint = "projects" + payload = dict(type=task_type.value, name=project_name, params=params) + projectdata = self.api.post_request(endpoint, body=payload) return Project(projectdata, self) - def projects(self): - response = self._getrequest('projects') - return response + def get_project(self, project_name: str) -> Project: + """Retrieves a single project with the given name. + https://docs.scale.com/reference#project-retrieval - def update_project(self, project_name, **kwargs): - allowed_kwargs = {'patch', 'instruction'} - for key in kwargs: - if key not in allowed_kwargs: - raise ScaleInvalidRequest('Illegal parameter %s for ScaleClient.update_project()' - % key, None) - projectdata = self._postrequest('projects/%s/setParams' % quote_string(project_name), payload=kwargs) - return projectdata - -def _generate_useragent(extension=None): - try: - python_version = platform.python_version() - os_platform = platform.platform() - - user_agent = " ".join( - filter( - None, - [ - "{}/{}".format(__name__, __version__), - "Python/{}".format(python_version), - "OS/{}".format(os_platform), - extension, - ], - ) - ) - return user_agent + Args: + project_name (str): + Project name + + Returns: + Project + """ + endpoint = f"projects/{Api.quote_string(project_name)}" + projectdata = self.api.get_request(endpoint) + return Project(projectdata, self) - except Exception: - return "scaleapi-python-client" + def get_projects(self) -> List[Project]: + """Returns all projects. + Refer to Projects API Reference: + https://docs.scale.com/reference#list-all-projects + Same as `projects()` method. -def quote_string(text): - """`quote_string('a bc/def')` -> `a%20bc%2Fdef` - Project and Batch names can be a part of URL, which causes an error - in case of a special character used. Quotation assures - the right object to be retrieved from API. - """ - return urllib.parse.quote(text, safe="") + Returns: + List[Project] + """ + return self.projects() -def _AddTaskTypeCreator(task_type): - def create_task_wrapper(self, **kwargs): - return self.create_task(task_type, **kwargs) - setattr(ScaleClient, 'create_' + task_type + '_task', create_task_wrapper) + def projects(self) -> List[Project]: + """Returns all projects. + Refer to Projects API Reference: + https://docs.scale.com/reference#list-all-projects + Returns: + List[Project] + """ + endpoint = "projects" + project_list = self.api.get_request(endpoint) + return [Project(project, self) for project in project_list] + + def update_project(self, project_name: str, **kwargs) -> Project: + """You can set parameters on a project. Project-level-parameters + will be set on future tasks created under this project if they + are not set in the task request. Any parameters specified in + the task request will override any project parameter. + https://docs.scale.com/reference#project-update-parameters + + Args: + project_name (str): + Project's name + + **kwargs: + Project parameters to be set. + + Returns: + Project + """ -for taskType in TASK_TYPES: - _AddTaskTypeCreator(taskType) + endpoint = f"projects/{Api.quote_string(project_name)}/setParams" + projectdata = self.api.post_request(endpoint, body=kwargs) + return Project(projectdata, self) diff --git a/scaleapi/_version.py b/scaleapi/_version.py index 92192ee..9c9b48f 100644 --- a/scaleapi/_version.py +++ b/scaleapi/_version.py @@ -1 +1,2 @@ -__version__ = "1.0.4" +__version__ = "2.0.0" +__package_name__ = "scaleapi" diff --git a/scaleapi/api.py b/scaleapi/api.py new file mode 100644 index 0000000..cdbe693 --- /dev/null +++ b/scaleapi/api.py @@ -0,0 +1,154 @@ +import platform +import urllib.parse + +import requests +from requests.adapters import HTTPAdapter, Response, Retry + +from ._version import __package_name__, __version__ +from .exceptions import ExceptionMap, ScaleException + +SCALE_ENDPOINT = "https://api.scale.com/v1" + +# Parameters for HTTP retry +HTTP_TOTAL_RETRIES = 3 # Number of total retries +HTTP_RETRY_BACKOFF_FACTOR = 2 # Wait 1, 2, 4 seconds between retries +HTTP_STATUS_FORCE_LIST = [429, 504] # Status codes to force retry +HTTP_RETRY_ALLOWED_METHODS = frozenset({"GET", "POST"}) + + +class Api: + """Internal Api reference for handling http operations""" + + def __init__(self, api_key, user_agent_extension=None): + if api_key == "" or api_key is None: + raise ScaleException("Please provide a valid API Key.") + + self.api_key = api_key + + self._auth = (self.api_key, "") + self._headers = { + "Content-Type": "application/json", + "User-Agent": self._generate_useragent(user_agent_extension), + } + + @staticmethod + def _http_request( + method, url, headers=None, auth=None, params=None, body=None + ) -> Response: + + https = requests.Session() + retry_strategy = Retry( + total=HTTP_TOTAL_RETRIES, + backoff_factor=HTTP_RETRY_BACKOFF_FACTOR, + status_forcelist=HTTP_STATUS_FORCE_LIST, + allowed_methods=HTTP_RETRY_ALLOWED_METHODS, + raise_on_status=False, + ) + + adapter = HTTPAdapter(max_retries=retry_strategy) + https.mount("https://", adapter) + + try: + params = params or {} + body = body or {} + + res = https.request( + method=method, + url=url, + headers=headers, + auth=auth, + params=params, + json=body, + ) + + return res + except Exception as err: + raise ScaleException(err) from err + + @staticmethod + def _raise_on_respose(res: Response): + + message = "" + try: + message = res.json().get("error", res.text) + except ValueError: + message = res.text + + try: + exception = ExceptionMap[res.status_code] + raise exception(message) + except KeyError as err: + raise ScaleException(message) from err + + def _api_request( + self, method, endpoint, headers=None, auth=None, params=None, body=None + ): + """Generic HTTP request method with error handling.""" + + url = f"{SCALE_ENDPOINT}/{endpoint}" + + res = self._http_request(method, url, headers, auth, params, body) + + json = None + if res.status_code == 200: + json = res.json() + else: + self._raise_on_respose(res) + + return json + + def get_request(self, endpoint, params=None): + """Generic GET Request Wrapper""" + return self._api_request( + "GET", endpoint, headers=self._headers, auth=self._auth, params=params + ) + + def post_request(self, endpoint, body=None): + """Generic POST Request Wrapper""" + return self._api_request( + "POST", endpoint, headers=self._headers, auth=self._auth, body=body + ) + + @staticmethod + def _generate_useragent(extension: str = None) -> str: + """Generates UserAgent parameter with module, Python + and OS details + + Args: + extension (str, optional): Option to extend UserAgent + with source system + + Returns: + str: Generated UserAgent parameter with platform versions + """ + python_version = platform.python_version() + os_platform = platform.platform() + + user_agent = " ".join( + filter( + None, + [ + f"{__package_name__}/{__version__}", + f"Python/{python_version}", + f"OS/{os_platform}", + extension, + ], + ) + ) + return user_agent + + @staticmethod + def quote_string(text: str) -> str: + """Project and Batch names can be a part of URL, which causes + an error in case of a special character used. + Quotation assures the right object to be retrieved from API. + + `quote_string('a bc/def')` -> `a%20bc%2Fdef` + + Args: + text (str): Input text to be quoted + + Returns: + str: Quoted text in return + """ + return urllib.parse.quote(text, safe="") diff --git a/scaleapi/batches.py b/scaleapi/batches.py index d5ca697..d0cb73a 100644 --- a/scaleapi/batches.py +++ b/scaleapi/batches.py @@ -1,33 +1,56 @@ -class Batch(object): - def __init__(self, param_dict, client): - self.param_dict = param_dict - self.name = param_dict['name'] - self.status = param_dict["status"] - - self.pending = None - self.completed = None - self.error = None - self.canceled = None - self.client = client - self.get_status() +from enum import Enum + + +class BatchStatus(Enum): + """Status of Batches""" + + Staging = "staging" + InProgress = "in_progress" + Completed = "completed" + + +class Batch: + """Batch class, contains Batch information""" + + def __init__(self, json, client): + self._json = json + self.name = json["name"] + self.status = json["status"] + self.project = json["project"] + self.created_at = json["created_at"] + self.project = json["project"] + + self.tasks_pending = None + self.tasks_completed = None + self.tasks_error = None + self.tasks_canceled = None + self._client = client def __hash__(self): return hash(self.name) def __str__(self): - return 'Batch(name=%s)' % self.name + return f"Batch(name={self.name})" def __repr__(self): - return 'Batch(%s)' % self.param_dict + return f"Batch({self._json})" + + def as_dict(self): + """Returns all attributes as a dictionary""" + return self._json def finalize(self): - res = self.client.finalize_batch(self.name) + """Finalizes the batch""" + res = self._client.finalize_batch(self.name) self.status = res.status return res def get_status(self): - res = self.client.batch_status(self.name) + """Returns status of the batch and + updates tasks_... parameters + """ + res = self._client.batch_status(self.name) self.status = res["status"] for stat in ["pending", "completed", "error", "canceled"]: - setattr(self, stat, res.get(stat, 0)) + setattr(self, "tasks_" + stat, res.get(stat, 0)) return res diff --git a/scaleapi/exceptions.py b/scaleapi/exceptions.py new file mode 100644 index 0000000..a603d2a --- /dev/null +++ b/scaleapi/exceptions.py @@ -0,0 +1,87 @@ +class ScaleException(Exception): + """Generic ScaleException class""" + + code = None + + def __init__(self, message, errcode=None): + if not message: + message = type(self).__name__ + self.message = message + + if errcode: + self.code = errcode + + if self.code: + super().__init__(f" {message}") + else: + super().__init__(f" {message}") + + +class ScaleInvalidRequest(ScaleException): + """400 - Bad Request -- The request was unacceptable, + often due to missing a required parameter. + """ + + code = 400 + + +class ScaleUnauthorized(ScaleException): + """401 - Unauthorized -- No valid API key provided.""" + + code = 401 + + +class ScaleNotEnabled(ScaleException): + """402 - Not enabled -- Please contact sales@scaleapi.com before + creating this type of task. + """ + + code = 402 + + +class ScaleResourceNotFound(ScaleException): + """404 - Not Found -- The requested resource doesn't exist.""" + + code = 404 + + +class ScaleDuplicateTask(ScaleException): + """409 - Conflict -- The provided idempotency key or unique_id is + already in use for a different request. + """ + + code = 409 + + +class ScaleTooManyRequests(ScaleException): + """429 - Too Many Requests -- Too many requests hit the API + too quickly. + """ + + code = 429 + + +class ScaleInternalError(ScaleException): + """500 - Internal Server Error -- We had a problem with our server. + Try again later. + """ + + code = 500 + + +class ScaleTimeoutError(ScaleException): + """504 - Server Timeout Error -- Try again later.""" + + code = 504 + + +ExceptionMap = { + ScaleInvalidRequest.code: ScaleInvalidRequest, + ScaleUnauthorized.code: ScaleUnauthorized, + ScaleNotEnabled.code: ScaleNotEnabled, + ScaleResourceNotFound.code: ScaleResourceNotFound, + ScaleDuplicateTask.code: ScaleDuplicateTask, + ScaleTooManyRequests.code: ScaleTooManyRequests, + ScaleInternalError.code: ScaleInternalError, + ScaleTimeoutError.code: ScaleTimeoutError, +} diff --git a/scaleapi/projects.py b/scaleapi/projects.py index b9dddd7..8a0536f 100644 --- a/scaleapi/projects.py +++ b/scaleapi/projects.py @@ -1,14 +1,30 @@ -class Project(object): - def __init__(self, param_dict, client): - self.param_dict = param_dict - self.name = param_dict['name'] - self.client = client +class Project: + """Project class, containing Project information.""" + + def __init__(self, json, client): + self._json = json + self.name = json["name"] + self.type = json["type"] + self._client = client + self.params = None + self.version = None + self.instruction = None + + if len(json["param_history"]): + self.params = json["param_history"][-1] + self.version = self.params["version"] + if "instruction" in self.params: + self.instruction = self.params["instruction"] def __hash__(self): return hash(self.name) def __str__(self): - return 'Project(name=%s)' % self.name + return f"Project(name={self.name})" def __repr__(self): - return 'Project(%s)' % self.param_dict + return f"Project({self._json})" + + def as_dict(self): + """Returns all attributes as a dictionary""" + return self._json diff --git a/scaleapi/tasks.py b/scaleapi/tasks.py index a94579a..7292386 100644 --- a/scaleapi/tasks.py +++ b/scaleapi/tasks.py @@ -1,30 +1,88 @@ -class Task(object): +from enum import Enum + + +class TaskType(Enum): + """Task Type List""" + + Annotation = "annotation" + Categorization = "categorization" + Comparison = "comparison" + CuboidAnnotation = "cuboidannotation" + DataCollection = "datacollection" + DocumentModel = "documentmodel" + DocumentTranscription = "documenttranscription" + ImageAnnotation = "imageannotation" + LaneAnnotation = "laneannotation" + LidarAnnotation = "lidarannotation" + LidarLinking = "lidarlinking" + LidarSegmentation = "lidarsegmentation" + LidarTopdown = "lidartopdown" + LineAnnotation = "lineannotation" + NamedEntityRecognition = "namedentityrecognition" + PointAnnotation = "pointannotation" + PolygonAnnotation = "polygonannotation" + SegmentAnnotation = "segmentannotation" + Transcription = "transcription" + TextCollection = "textcollection" + VideoAnnotation = "videoannotation" + VideoBoxAnnotation = "videoboxannotation" + VideoPlaybackAnnotation = "videoplaybackannotation" + VideoCuboidAnnotation = "videocuboidannotation" + + +class TaskReviewStatus(Enum): + """Customer Audit Status of Task""" + + Accepted = "accepted" + Fixed = "fixed" + Commented = "commented" + Rejected = "rejected" + + +class TaskStatus(Enum): + """Status of Task""" + + Pending = "pending" + Completed = "completed" + Canceled = "canceled" + + +class Task: """Task class, containing task information.""" - def __init__(self, param_dict, client): - self.client = client - self.param_dict = param_dict - self.id = param_dict['task_id'] + def __init__(self, json, client): + self._client = client + self._json = json + self.id = json["task_id"] def __getattr__(self, name): - if name in self.param_dict: - return self.param_dict[name] - if name in self.params: - return self.params[name] - raise AttributeError("'%s' object has no attribute %s" - % (type(self).__name__, name)) + if name in self._json: + return self._json[name] + raise AttributeError(f"'{type(self).__name__}' object has no attribute {name}") def __hash__(self): return hash(self.id) def __str__(self): - return 'Task(id=%s)' % self.id + return f"Task(id={self.id})" def __repr__(self): - return 'Task(%s)' % self.param_dict + return f"Task({self._json})" + + def as_dict(self): + """Returns object details as a dictionary + + `Task.as_dict()['params']` + + Returns: + Dict with object content + """ + return self._json def refresh(self): - self.param_dict = self.client._getrequest('task/%s' % self.id) + """Refreshes the task details.""" + self._json = self._client.fetch_task(self.id).as_dict() def cancel(self): - self.client.cancel_task(self.id) + """Cancels the task""" + self._client.cancel_task(self.id) diff --git a/setup.cfg b/setup.cfg index 5aef279..69b2789 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,12 @@ [metadata] -description-file = README.rst +long_description = file: README.rst +long_description_content_type = text/x-rst + +[flake8] +# Recommend matching the black line length (default 88) +max-line-length = 88 +max-doc-length = 72 +max-complexity = 10 +extend-ignore = + # See https://github.com/PyCQA/pycodestyle/issues/373 + E203, diff --git a/setup.py b/setup.py index 5a8621a..9c5799e 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,3 @@ -import sys -import warnings import os.path try: @@ -7,62 +5,55 @@ except ImportError: from distutils.core import setup -install_requires = ['requests>=2.4.2'] +install_requires = ["requests>=2.25.0", "urllib3>=1.26.0"] -if sys.version_info < (3, 4, 0): - install_requires.append('enum34') - -if sys.version_info < (2, 7, 9): - warnings.warn( - 'Users have reported issues with SNI / SSL by using Scale on ' - 'versions of Python older than 2.7.9. If at all possible, you should ' - 'upgrade your version of Python. ' - 'If you have any questions, please file an issue on Github or ' - 'contact us at support@scale.com.', - DeprecationWarning) - install_requires.append('pyOpenSSL') - install_requires.append('ndg-httpsclient') - install_requires.append('pyasn1') - install_requires.append('idna') - install_requires.append('requests[security]') def read(rel_path): + """Read lines from given file""" here = os.path.abspath(os.path.dirname(__file__)) - with open(os.path.join(here, rel_path), 'r') as fp: + with open(os.path.join(here, rel_path), "r") as fp: return fp.read() + def get_version(rel_path): + """Read __version__ from given file""" for line in read(rel_path).splitlines(): - if line.startswith('__version__'): + if line.startswith("__version__"): delim = '"' if '"' in line else "'" return line.split(delim)[1] - raise RuntimeError("Unable to find a valid __version__ string in %s." % rel_path) + raise RuntimeError(f"Unable to find a valid __version__ string in {rel_path}.") + setup( - name='scaleapi', - packages=['scaleapi'], + name="scaleapi", + packages=["scaleapi"], version=get_version("scaleapi/_version.py"), - description='The official Python client library for Scale AI, the Data Platform for AI', - author='Scale AI', - author_email='support@scale.com', - url='https://github.com/scaleapi/scaleapi-python-client', + description="The official Python client library for Scale AI, " + "the Data Platform for AI", + author="Scale AI", + author_email="support@scale.com", + url="https://github.com/scaleapi/scaleapi-python-client", keywords=[ - 'scale', - 'scaleapi', - 'tasks', - 'categorization', - 'labeling', - 'annotation', + "scale", + "scaleapi", + "tasks", + "categorization", + "labeling", + "annotation", ], install_requires=install_requires, - classifiers=['Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'License :: OSI Approved :: MIT License', - 'Intended Audience :: Developers', - 'Topic :: Software Development :: Libraries'] + python_requires=">=3.6", + classifiers=[ + "Development Status :: 5 - Production/Stable", + "Natural Language :: English", + "Programming Language :: Python", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "License :: OSI Approved :: MIT License", + "Intended Audience :: Developers", + "Topic :: Software Development :: Libraries", + ], ) diff --git a/tests/test_client.py b/tests/test_client.py index 05222b1..c2af8f7 100644 --- a/tests/test_client.py +++ b/tests/test_client.py @@ -1,223 +1,290 @@ -# coding: utf-8 +# pylint: disable=missing-function-docstring -import pytest -import scaleapi +import os import time +import uuid from datetime import datetime -from random import randint -import os + +import pytest + +import scaleapi +from scaleapi.exceptions import ( + ScaleDuplicateTask, + ScaleInvalidRequest, + ScaleResourceNotFound, + ScaleUnauthorized, +) +from scaleapi.tasks import TaskType + +TEST_PROJECT_NAME = "scaleapi-python-sdk" try: - test_api_key = os.environ['SCALE_TEST_API_KEY'] - client = scaleapi.ScaleClient(test_api_key, 'pytest') -except KeyError: - raise Exception("Please set the environment variable SCALE_TEST_API_KEY to run tests.") - -def make_a_task(): - return client.create_imageannotation_task( - callback_url = "http://www.example.com/callback", - instruction = "Draw a box around each baby cow and big cow.", - attachment_type = "image", - attachment = "http://i.imgur.com/v4cBreD.jpg", - geometries = { + test_api_key = os.environ["SCALE_TEST_API_KEY"] + client = scaleapi.ScaleClient(test_api_key, "pytest") +except KeyError as err: + raise Exception( + "Please set the environment variable SCALE_TEST_API_KEY to run tests." + ) from err + +try: + project = client.get_project(TEST_PROJECT_NAME) +except ScaleResourceNotFound: + client.create_project( + project_name=TEST_PROJECT_NAME, task_type=TaskType.ImageAnnotation + ) + + +def test_invalidkey_fail(): + client_fail = scaleapi.ScaleClient("dummy_api_key", "pytest") + with pytest.raises(ScaleUnauthorized): + client_fail.batches(limit=1) + + +def make_a_task(unique_id: str = None, batch: str = None): + + args = { + "callback_url": "http://www.example.com/callback", + "instruction": "Draw a box around each baby cow and big cow.", + "attachment_type": "image", + "attachment": "http://i.imgur.com/v4cBreD.jpg", + "geometries": { "box": { - "objects_to_annotate": ["Baby Cow", "Big Cow"], - "min_height": 10, - "min_width": 10 + "objects_to_annotate": ["Baby Cow", "Big Cow"], + "min_height": 10, + "min_width": 10, } - } - ) + }, + } + if unique_id: + args["unique_id"] = unique_id + if batch: + args["batch"] = batch + + return client.create_task(TaskType.ImageAnnotation, **args) + + +def test_uniquekey_fail(): + unique_key = str(uuid.uuid4()) + make_a_task(unique_key) + with pytest.raises(ScaleDuplicateTask): + make_a_task(unique_key) + def test_categorize_ok(): - task = client.create_categorization_task( - callback_url='http://www.example.com/callback', - instruction='Is this company public or private?', - attachment_type='website', + client.create_task( + TaskType.Categorization, + callback_url="http://www.example.com/callback", + instruction="Is this company public or private?", + attachment_type="website", force=True, - attachment='http://www.google.com/', - categories=['public', 'private']) + attachment="http://www.google.com/", + categories=["public", "private"], + ) + def test_categorize_fail(): - with pytest.raises(scaleapi.ScaleInvalidRequest): - client.create_categorization_task( - callback_url='http://www.example.com/callback', - categories=['public', 'private']) + with pytest.raises(ScaleInvalidRequest): + client.create_task( + TaskType.Categorization, + callback_url="http://www.example.com/callback", + categories=["public", "private"], + ) + def test_transcription_ok(): - task = client.create_transcription_task( - callback_url='http://www.example.com/callback', - instruction='Transcribe the given fields. Then for each news item on the page, transcribe the information for the row.', - attachment_type='website', - attachment='http://www.google.com/', - fields={ - 'title': 'Title of Webpage', - 'top_result': 'Title of the top result' - }, + client.create_task( + TaskType.Transcription, + callback_url="http://www.example.com/callback", + instruction="Transcribe the given fields. Then for each news item on the page, " + "transcribe the information for the row.", + attachment_type="website", + attachment="http://www.google.com/", + fields={"title": "Title of Webpage", "top_result": "Title of the top result"}, repeatable_fields={ - 'username': 'Username of submitter', - 'comment_count': 'Number of comments' - }) + "username": "Username of submitter", + "comment_count": "Number of comments", + }, + ) + def test_transcription_fail(): - with pytest.raises(scaleapi.ScaleInvalidRequest): - client.create_transcription_task( - callback_url='http://www.example.com/callback', - attachment_type='website') + with pytest.raises(ScaleInvalidRequest): + client.create_task( + TaskType.Transcription, + callback_url="http://www.example.com/callback", + attachment_type="website", + ) + def test_imageannotation_ok(): - client.create_imageannotation_task( - callback_url = "http://www.example.com/callback", - instruction = "Draw a box around each baby cow and big cow.", - attachment_type = "image", - attachment = "http://i.imgur.com/v4cBreD.jpg", - geometries = { + client.create_task( + TaskType.ImageAnnotation, + callback_url="http://www.example.com/callback", + instruction="Draw a box around each baby cow and big cow.", + attachment_type="image", + attachment="http://i.imgur.com/v4cBreD.jpg", + geometries={ "box": { - "objects_to_annotate": ["Baby Cow", "Big Cow"], - "min_height": 10, - "min_width": 10 + "objects_to_annotate": ["Baby Cow", "Big Cow"], + "min_height": 10, + "min_width": 10, } - } + }, ) + def test_imageannotation_fail(): - with pytest.raises(scaleapi.ScaleInvalidRequest): - client.create_imageannotation_task( - callback_url='http://www.example.com/callback', - instruction='Draw a box around each **baby cow** and **big cow**', - attachment_type='image') + with pytest.raises(ScaleInvalidRequest): + client.create_task( + TaskType.ImageAnnotation, + callback_url="http://www.example.com/callback", + instruction="Draw a box around each **baby cow** and **big cow**", + attachment_type="image", + ) + def test_documenttranscription_ok(): - client.create_documenttranscription_task( - callback_url= 'http://www.example.com/callback', - instruction= 'Please transcribe this receipt.', - attachment= 'http://document.scale.com/receipt-20200519.jpg', - features= [ - { - 'type': "block", - 'label': "barcode", - } - ] + client.create_task( + TaskType.DocumentTranscription, + callback_url="http://www.example.com/callback", + instruction="Please transcribe this receipt.", + attachment="http://document.scale.com/receipt-20200519.jpg", + features=[{"type": "block", "label": "barcode"}], ) + def test_documenttranscription_fail(): - with pytest.raises(scaleapi.ScaleInvalidRequest): - client.create_imageannotation_task( - callback_url='http://www.example.com/callback', - instruction='Please transcribe this receipt.', - ) + with pytest.raises(ScaleInvalidRequest): + client.create_task( + TaskType.DocumentTranscription, + callback_url="http://www.example.com/callback", + instruction="Please transcribe this receipt.", + ) + def test_annotation_ok(): - task = client.create_annotation_task( - callback_url='http://www.example.com/callback', - instruction='Draw a box around each **baby cow** and **big cow**', - attachment_type='image', - attachment='http://i.imgur.com/v4cBreD.jpg', - min_width='30', - min_height='30', - objects_to_annotate=['baby cow', 'big cow'], - with_labels=True) + client.create_task( + TaskType.Annotation, + callback_url="http://www.example.com/callback", + instruction="Draw a box around each **baby cow** and **big cow**", + attachment_type="image", + attachment="http://i.imgur.com/v4cBreD.jpg", + min_width="30", + min_height="30", + objects_to_annotate=["baby cow", "big cow"], + with_labels=True, + ) + def test_annotation_fail(): - with pytest.raises(scaleapi.ScaleInvalidRequest): - client.create_annotation_task( - callback_url='http://www.example.com/callback', - instruction='Draw a box around each **baby cow** and **big cow**', - attachment_type='image') + with pytest.raises(ScaleInvalidRequest): + client.create_task( + TaskType.Annotation, + callback_url="http://www.example.com/callback", + instruction="Draw a box around each **baby cow** and **big cow**", + attachment_type="image", + ) + def test_polygonannotation_ok(): - task = client.create_polygonannotation_task( - callback_url='http://www.example.com/callback', - instruction='Draw a tight shape around the big cow', - attachment_type='image', - attachment='http://i.imgur.com/v4cBreD.jpg', - objects_to_annotate=['big cow'], - with_labels=True) + client.create_task( + TaskType.PolygonAnnotation, + callback_url="http://www.example.com/callback", + instruction="Draw a tight shape around the big cow", + attachment_type="image", + attachment="http://i.imgur.com/v4cBreD.jpg", + objects_to_annotate=["big cow"], + with_labels=True, + ) + def test_polygonannotation_fail(): - with pytest.raises(scaleapi.ScaleInvalidRequest): - client.create_polygonannotation_task( - callback_url='http://www.example.com/callback', - instruction='Draw a tight shape around the big cow', - attachment_type='image') + with pytest.raises(ScaleInvalidRequest): + client.create_task( + TaskType.PolygonAnnotation, + callback_url="http://www.example.com/callback", + instruction="Draw a tight shape around the big cow", + attachment_type="image", + ) + def test_lineannotation_ok(): - task = client.create_lineannotation_task( - callback_url='http://www.example.com/callback', - instruction='Draw a tight shape around the big cow', - attachment_type='image', - attachment='http://i.imgur.com/v4cBreD.jpg', - objects_to_annotate=['big cow'], - with_labels=True) + client.create_task( + TaskType.LineAnnotation, + callback_url="http://www.example.com/callback", + instruction="Draw a tight shape around the big cow", + attachment_type="image", + attachment="http://i.imgur.com/v4cBreD.jpg", + objects_to_annotate=["big cow"], + with_labels=True, + ) + def test_lineannotation_fail(): - with pytest.raises(scaleapi.ScaleInvalidRequest): - client.create_lineannotation_task( - callback_url='http://www.example.com/callback', - instruction='Draw a tight shape around the big cow', - attachment_type='image') + with pytest.raises(ScaleInvalidRequest): + client.create_task( + TaskType.LineAnnotation, + callback_url="http://www.example.com/callback", + instruction="Draw a tight shape around the big cow", + attachment_type="image", + ) + def test_datacollection_ok(): - task = client.create_datacollection_task( - callback_url='http://www.example.com/callback', - instruction='Find the URL for the hiring page for the company with attached website.', - attachment_type='website', - attachment='http://www.google.com/', - fields={ 'hiring_page': 'Hiring Page URL' }) + client.create_task( + TaskType.DataCollection, + callback_url="http://www.example.com/callback", + instruction="Find the URL for the hiring page for the company" + " with attached website.", + attachment_type="website", + attachment="http://www.google.com/", + fields={"hiring_page": "Hiring Page URL"}, + ) + def test_datacollection_fail(): - with pytest.raises(scaleapi.ScaleInvalidRequest): - client.create_datacollection_task( - callback_url='http://www.example.com/callback', - attachment_type='website') - -def test_audiotranscription_ok(): - task = client.create_audiotranscription_task( - callback_url='http://www.example.com/callback', - attachment_type='audio', - instruction='Listen to the audio file and transcript.', - attachment='https://storage.googleapis.com/deepmind-media/pixie/knowing-what-to-say/second-list/speaker-3.wav', - verbatim=False, - phrases=['avocado', 'stone'] - ) + with pytest.raises(ScaleInvalidRequest): + client.create_task( + TaskType.DataCollection, + callback_url="http://www.example.com/callback", + attachment_type="website", + ) -def test_audiotranscription_fail(): - with pytest.raises(scaleapi.ScaleInvalidRequest): - client.create_audiotranscription_task( - callback_url='http://www.example.com/callback', - attachment_type='audio') def test_namedentityrecognition_ok(): - return client.create_namedentityrecognition_task( - callback_url='http://www.example.com/callback', - instruction='Do the objects in these images have the same pattern?', - text='Example text to label with NER tool', - labels=[{ - 'name': 'Label_A', - 'description': 'the first label', - }]) - + return client.create_task( + TaskType.NamedEntityRecognition, + callback_url="http://www.example.com/callback", + instruction="Do the objects in these images have the same pattern?", + text="Example text to label with NER tool", + labels=[{"name": "Label_A", "description": "the first label"}], + ) + + def test_cancel(): task = make_a_task() # raises a scaleexception, because test tasks complete instantly - with pytest.raises(scaleapi.ScaleException): + with pytest.raises(ScaleInvalidRequest): task.cancel() + def test_task_retrieval(): task = make_a_task() - task2 = client.fetch_task(task.id) - assert task2.status == 'completed' + task2 = client.get_task(task.id) + assert task2.status == "completed" assert task2.id == task.id assert task2.callback_url == task.callback_url assert task2.instruction == task.instruction - assert task2.attachment_type == task.attachment_type - assert task2.attachment == task.attachment - assert task2.geometries == task.geometries + assert task2.params["attachment_type"] == task.params["attachment_type"] + assert task2.params["attachment"] == task.params["attachment"] + assert task2.params["geometries"] == task.params["geometries"] assert task2.metadata == task.metadata assert task2.type == task.type assert task2.created_at == task.created_at + def test_task_retrieval_time(): - task = make_a_task() + make_a_task() time.sleep(0.5) start_time = datetime.utcnow().isoformat() time.sleep(0.5) @@ -225,49 +292,84 @@ def test_task_retrieval_time(): tasks = client.tasks(start_time=start_time, end_time=end_time) assert tasks.docs == [] + def test_task_retrieval_fail(): - with pytest.raises(scaleapi.ScaleException): - client.fetch_task('fake_id_qwertyuiop') + with pytest.raises(ScaleResourceNotFound): + client.get_task("fake_id_qwertyuiop") + def test_tasks(): tasks = [] - for i in range(3): + for _ in range(3): tasks.append(make_a_task()) task_ids = {task.id for task in tasks} for task in client.tasks(limit=3): assert task.id in task_ids + def test_tasks_invalid(): - with pytest.raises(scaleapi.ScaleException): + with pytest.raises(ScaleInvalidRequest): client.tasks(bogus=0) + def create_a_batch(): return client.create_batch( - callback = "http://www.example.com/callback", - batch_name = "scaleapi-python-sdk-" + str(randint(0, 99999)), - project = "scaleapi-python-sdk" + callback="http://www.example.com/callback", + batch_name=str(uuid.uuid4()), + project=TEST_PROJECT_NAME, ) + +def test_get_tasks(): + batch = create_a_batch() + tasks = [] + for _ in range(3): + tasks.append(make_a_task(batch=batch.name)) + task_ids = {task.id for task in tasks} + for task in client.get_tasks(project_name=TEST_PROJECT_NAME, batch_name=batch.name): + assert task.id in task_ids + + def test_finalize_batch(): batch = create_a_batch() batch = client.finalize_batch(batch.name) - assert batch.status == 'in_progress' batch2 = create_a_batch() batch2.finalize() - assert batch2.status == 'in_progress' + def test_get_batch_status(): batch = create_a_batch() client.batch_status(batch.name) - assert batch.status == 'staging' + assert batch.status == "staging" + + batch2 = client.get_batch(batch.name) + batch2.get_status() # Test status update + assert batch2.status == "staging" - batch.finalize() - batch.get_status() # Test status update - assert batch.status == 'in_progress' def test_get_batch(): batch = create_a_batch() batch2 = client.get_batch(batch.name) assert batch.name == batch2.name - assert batch2.status == 'staging' + assert batch2.status == "staging" + + +def test_batches(): + batches = [] + for _ in range(3): + batches.append(create_a_batch()) + batch_names = {batch.name for batch in batches} + + for batch in client.batches(limit=3): + assert batch.name in batch_names + + +def test_get_batches(): + # Get count of all batches + batchlist = client.batches(project=TEST_PROJECT_NAME, limit=1) + total_batches = batchlist.total + + # Download all batches to check total count + all_batches = list(client.get_batches(project_name=TEST_PROJECT_NAME)) + assert total_batches == len(all_batches)