diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 32859c094..3b3101552 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.5.0a1 +current_version = 1.7.0a1 parse = (?P[\d]+) # major version number \.(?P[\d]+) # minor version number \.(?P[\d]+) # patch version number diff --git a/.changes/0.0.0.md b/.changes/0.0.0.md index bed773333..0bec014d9 100644 --- a/.changes/0.0.0.md +++ b/.changes/0.0.0.md @@ -1,5 +1,7 @@ ## Previous Releases For information on prior major and minor releases, see their changelogs: +- [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md) +- [1.5](https://github.com/dbt-labs/dbt-spark/blob/1.5.latest/CHANGELOG.md) - [1.4](https://github.com/dbt-labs/dbt-spark/blob/1.4.latest/CHANGELOG.md) - [1.3](https://github.com/dbt-labs/dbt-spark/blob/1.3.latest/CHANGELOG.md) - [1.2](https://github.com/dbt-labs/dbt-spark/blob/1.2.latest/CHANGELOG.md) diff --git a/.changes/unreleased/Dependencies-20230126-152319.yaml b/.changes/unreleased/Dependencies-20230126-152319.yaml deleted file mode 100644 index 7fe5d0c77..000000000 --- a/.changes/unreleased/Dependencies-20230126-152319.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Dependencies -body: Allow thrift 0.16.0 -time: 2023-01-26T15:23:19.978823-08:00 -custom: - Author: colin-rogers-dbt - Issue: "606" - PR: "605" diff --git a/.changes/unreleased/Dependencies-20230424-230630.yaml b/.changes/unreleased/Dependencies-20230424-230630.yaml new file mode 100644 index 000000000..1f96daad1 --- /dev/null +++ b/.changes/unreleased/Dependencies-20230424-230630.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update tox requirement from ~=3.0 to ~=4.5" +time: 2023-04-24T23:06:30.00000Z +custom: + Author: dependabot[bot] + PR: 741 diff --git a/.changes/unreleased/Dependencies-20230424-230645.yaml b/.changes/unreleased/Dependencies-20230424-230645.yaml new file mode 100644 index 000000000..83e1bb44b --- /dev/null +++ b/.changes/unreleased/Dependencies-20230424-230645.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update pyodbc requirement from ~=4.0.30 to ~=4.0.39" +time: 2023-04-24T23:06:45.00000Z +custom: + Author: dependabot[bot] + PR: 742 diff --git a/.changes/unreleased/Dependencies-20230501-231003.yaml b/.changes/unreleased/Dependencies-20230501-231003.yaml new file mode 100644 index 000000000..b3e3a750e --- /dev/null +++ b/.changes/unreleased/Dependencies-20230501-231003.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update pre-commit requirement from ~=2.21 to ~=3.3" +time: 2023-05-01T23:10:03.00000Z +custom: + Author: dependabot[bot] + PR: 748 diff --git a/.changes/unreleased/Dependencies-20230501-231035.yaml b/.changes/unreleased/Dependencies-20230501-231035.yaml new file mode 100644 index 000000000..7bbf98202 --- /dev/null +++ b/.changes/unreleased/Dependencies-20230501-231035.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update types-requests requirement from ~=2.28 to ~=2.29" +time: 2023-05-01T23:10:35.00000Z +custom: + Author: dependabot[bot] + PR: 749 diff --git a/.changes/unreleased/Dependencies-20230510-230725.yaml b/.changes/unreleased/Dependencies-20230510-230725.yaml new file mode 100644 index 000000000..dfd04ad3b --- /dev/null +++ b/.changes/unreleased/Dependencies-20230510-230725.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Bump mypy from 1.2.0 to 1.3.0" +time: 2023-05-10T23:07:25.00000Z +custom: + Author: dependabot[bot] + PR: 768 diff --git a/.changes/unreleased/Dependencies-20230803-224622.yaml b/.changes/unreleased/Dependencies-20230803-224622.yaml new file mode 100644 index 000000000..119a08e51 --- /dev/null +++ b/.changes/unreleased/Dependencies-20230803-224622.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update flake8 requirement from ~=6.0 to ~=6.1" +time: 2023-08-03T22:46:22.00000Z +custom: + Author: dependabot[bot] + PR: 849 diff --git a/.changes/unreleased/Dependencies-20230803-224626.yaml b/.changes/unreleased/Dependencies-20230803-224626.yaml new file mode 100644 index 000000000..c8b9ef04a --- /dev/null +++ b/.changes/unreleased/Dependencies-20230803-224626.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update pytest-xdist requirement from ~=3.2 to ~=3.3" +time: 2023-08-03T22:46:26.00000Z +custom: + Author: dependabot[bot] + PR: 851 diff --git a/.changes/unreleased/Dependencies-20230803-224629.yaml b/.changes/unreleased/Dependencies-20230803-224629.yaml new file mode 100644 index 000000000..6865c7c74 --- /dev/null +++ b/.changes/unreleased/Dependencies-20230803-224629.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update pytest requirement from ~=7.3 to ~=7.4" +time: 2023-08-03T22:46:29.00000Z +custom: + Author: dependabot[bot] + PR: 852 diff --git a/.changes/unreleased/Dependencies-20230804-225232.yaml b/.changes/unreleased/Dependencies-20230804-225232.yaml new file mode 100644 index 000000000..f4a09b6b0 --- /dev/null +++ b/.changes/unreleased/Dependencies-20230804-225232.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update pip-tools requirement from ~=6.13 to ~=7.2" +time: 2023-08-04T22:52:32.00000Z +custom: + Author: dependabot[bot] + PR: 856 diff --git a/.changes/unreleased/Dependencies-20230804-225243.yaml b/.changes/unreleased/Dependencies-20230804-225243.yaml new file mode 100644 index 000000000..07b9bdb4e --- /dev/null +++ b/.changes/unreleased/Dependencies-20230804-225243.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update wheel requirement from ~=0.40 to ~=0.41" +time: 2023-08-04T22:52:43.00000Z +custom: + Author: dependabot[bot] + PR: 858 diff --git a/.changes/unreleased/Dependencies-20230807-221033.yaml b/.changes/unreleased/Dependencies-20230807-221033.yaml new file mode 100644 index 000000000..94a261147 --- /dev/null +++ b/.changes/unreleased/Dependencies-20230807-221033.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Bump mypy from 1.3.0 to 1.4.1" +time: 2023-08-07T22:10:33.00000Z +custom: + Author: dependabot[bot] + PR: 860 diff --git a/.changes/unreleased/Dependencies-20230807-221037.yaml b/.changes/unreleased/Dependencies-20230807-221037.yaml new file mode 100644 index 000000000..daa1e3ba0 --- /dev/null +++ b/.changes/unreleased/Dependencies-20230807-221037.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update tox requirement from ~=4.5 to ~=4.6" +time: 2023-08-07T22:10:37.00000Z +custom: + Author: dependabot[bot] + PR: 861 diff --git a/.changes/unreleased/Dependencies-20230809-043913.yaml b/.changes/unreleased/Dependencies-20230809-043913.yaml new file mode 100644 index 000000000..28432003d --- /dev/null +++ b/.changes/unreleased/Dependencies-20230809-043913.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update pip-tools requirement from ~=7.2 to ~=7.3" +time: 2023-08-09T04:39:13.00000Z +custom: + Author: dependabot[bot] + PR: 863 diff --git a/.changes/unreleased/Dependencies-20230811-221135.yaml b/.changes/unreleased/Dependencies-20230811-221135.yaml new file mode 100644 index 000000000..4fd2e4f54 --- /dev/null +++ b/.changes/unreleased/Dependencies-20230811-221135.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update tox requirement from ~=4.6 to ~=4.7" +time: 2023-08-11T22:11:35.00000Z +custom: + Author: dependabot[bot] + PR: 867 diff --git a/.changes/unreleased/Dependencies-20230814-224754.yaml b/.changes/unreleased/Dependencies-20230814-224754.yaml new file mode 100644 index 000000000..4cd4a01d4 --- /dev/null +++ b/.changes/unreleased/Dependencies-20230814-224754.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Update tox requirement from ~=4.7 to ~=4.8" +time: 2023-08-14T22:47:54.00000Z +custom: + Author: dependabot[bot] + PR: 871 diff --git a/.changes/unreleased/Dependencies-20230814-224757.yaml b/.changes/unreleased/Dependencies-20230814-224757.yaml new file mode 100644 index 000000000..7014382bd --- /dev/null +++ b/.changes/unreleased/Dependencies-20230814-224757.yaml @@ -0,0 +1,6 @@ +kind: "Dependencies" +body: "Bump mypy from 1.4.1 to 1.5.0" +time: 2023-08-14T22:47:57.00000Z +custom: + Author: dependabot[bot] + PR: 872 diff --git a/.changes/unreleased/Features-20230707-104150.yaml b/.changes/unreleased/Features-20230707-104150.yaml new file mode 100644 index 000000000..183a37b45 --- /dev/null +++ b/.changes/unreleased/Features-20230707-104150.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Support server_side_parameters for Spark session connection method +time: 2023-07-07T10:41:50.01541+02:00 +custom: + Author: alarocca-apixio + Issue: "690" diff --git a/.changes/unreleased/Features-20230707-113337.yaml b/.changes/unreleased/Features-20230707-113337.yaml new file mode 100644 index 000000000..de0a50fe8 --- /dev/null +++ b/.changes/unreleased/Features-20230707-113337.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Add server_side_parameters to HTTP connection method +time: 2023-07-07T11:33:37.794112+02:00 +custom: + Author: Fokko,JCZuurmond + Issue: "824" diff --git a/.changes/unreleased/Features-20230707-114650.yaml b/.changes/unreleased/Features-20230707-114650.yaml new file mode 100644 index 000000000..6f1b3d38a --- /dev/null +++ b/.changes/unreleased/Features-20230707-114650.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Enforce server side parameters keys and values to be strings +time: 2023-07-07T11:46:50.390918+02:00 +custom: + Author: Fokko,JCZuurmond + Issue: "826" diff --git a/.changes/unreleased/Features-20230707-135442.yaml b/.changes/unreleased/Features-20230707-135442.yaml new file mode 100644 index 000000000..39b119527 --- /dev/null +++ b/.changes/unreleased/Features-20230707-135442.yaml @@ -0,0 +1,6 @@ +kind: Features +body: Add SessionConnectionWrapper +time: 2023-07-07T13:54:42.41341+02:00 +custom: + Author: Fokko + Issue: "829" diff --git a/.changes/unreleased/Fixes-20230123-134955.yaml b/.changes/unreleased/Fixes-20230123-134955.yaml deleted file mode 100644 index a40d912ba..000000000 --- a/.changes/unreleased/Fixes-20230123-134955.yaml +++ /dev/null @@ -1,7 +0,0 @@ -kind: Fixes -body: add merge_exclude_columns tests -time: 2023-01-23T13:49:55.74249-06:00 -custom: - Author: dave-connors-3 - Issue: "00" - PR: "600" diff --git a/.changes/unreleased/Fixes-20230517-142331.yaml b/.changes/unreleased/Fixes-20230517-142331.yaml new file mode 100644 index 000000000..9f90e48b3 --- /dev/null +++ b/.changes/unreleased/Fixes-20230517-142331.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Wrap AnalysisException into DbtRuntimeError +time: 2023-05-17T14:23:31.263799+02:00 +custom: + Author: Fokko + Issue: "782" diff --git a/.changes/unreleased/Fixes-20230810-014122.yaml b/.changes/unreleased/Fixes-20230810-014122.yaml new file mode 100644 index 000000000..fcb34237e --- /dev/null +++ b/.changes/unreleased/Fixes-20230810-014122.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: include tblproperties macro in adapters.sql create table +time: 2023-08-10T01:41:22.782982+08:00 +custom: + Author: etheleon + Issue: "865" diff --git a/.changes/unreleased/Under the Hood-20230130-170310.yaml b/.changes/unreleased/Under the Hood-20230130-170310.yaml deleted file mode 100644 index c9131b6b3..000000000 --- a/.changes/unreleased/Under the Hood-20230130-170310.yaml +++ /dev/null @@ -1,6 +0,0 @@ -kind: Under the Hood -body: 'remove tox call to integration tests' -time: 2023-01-30T17:03:10.031843-08:00 -custom: - Author: colin-rogers-dbt - Issue: "584" diff --git a/.changes/unreleased/Under the Hood-20230724-165508.yaml b/.changes/unreleased/Under the Hood-20230724-165508.yaml new file mode 100644 index 000000000..889484644 --- /dev/null +++ b/.changes/unreleased/Under the Hood-20230724-165508.yaml @@ -0,0 +1,6 @@ +kind: Under the Hood +body: Update stale workflow to use centralized version +time: 2023-07-24T16:55:08.096947-04:00 +custom: + Author: mikealfare + Issue: "842" diff --git a/.circleci/config.yml b/.circleci/config.yml index 5fea78c3f..f2a3b6357 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -10,23 +10,25 @@ jobs: - checkout - run: tox -e flake8,unit +# Turning off for now due to flaky runs of tests will turn back on at later date. integration-spark-session: - environment: - DBT_INVOCATION_ENV: circle - docker: - - image: godatadriven/pyspark:3.1 - steps: - - checkout - - run: apt-get update - - run: python3 -m pip install --upgrade pip - - run: apt-get install -y git gcc g++ unixodbc-dev libsasl2-dev - - run: python3 -m pip install tox - - run: - name: Run integration tests - command: tox -e integration-spark-session - no_output_timeout: 1h - - store_artifacts: - path: ./logs + environment: + DBT_INVOCATION_ENV: circle + docker: + - image: godatadriven/pyspark:3.1 + steps: + - checkout + - run: apt-get update + - run: conda install python=3.10 + - run: python3 -m pip install --upgrade pip + - run: apt-get install -y git gcc g++ unixodbc-dev libsasl2-dev libxml2-dev libxslt-dev + - run: python3 -m pip install tox + - run: + name: Run integration tests + command: tox -e integration-spark-session + no_output_timeout: 1h + - store_artifacts: + path: ./logs integration-spark-thrift: environment: diff --git a/.flake8 b/.flake8 index f39d154c0..bbc3202a0 100644 --- a/.flake8 +++ b/.flake8 @@ -4,9 +4,11 @@ select = W F ignore = - W503 # makes Flake8 work like black - W504 - E203 # makes Flake8 work like black - E741 - E501 + # makes Flake8 work like black + W503, + W504, + # makes Flake8 work like black + E203, + E741, + E501, exclude = test diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 000000000..f6283d123 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,3 @@ +# This codeowners file is used to ensure all PRs require reviews from the adapters team + +* @dbt-labs/core-adapters diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 11381456a..a3c340cc3 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,26 +1,35 @@ resolves # +[docs](https://github.com/dbt-labs/docs.getdbt.com/issues/new/choose) dbt-labs/docs.getdbt.com/# -### Description +### Problem + +### Solution + + ### Checklist -- [ ] I have read [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md) and understand what's expected of me -- [ ] I have signed the [CLA](https://docs.getdbt.com/docs/contributor-license-agreements) +- [ ] I have read [the contributing guide](https://github.com/dbt-labs/dbt-core/blob/main/CONTRIBUTING.md) and understand what's expected of me - [ ] I have run this code in development and it appears to resolve the stated issue - [ ] This PR includes tests, or tests are not required/relevant for this PR -- [ ] I have [opened an issue to add/update docs](https://github.com/dbt-labs/docs.getdbt.com/issues/new/choose), or docs changes are not required/relevant for this PR -- [ ] I have run `changie new` to [create a changelog entry](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#Adding-CHANGELOG-Entry) +- [ ] This PR has no interface changes (e.g. macros, cli, logs, json artifacts, config files, adapter interface, etc) or this PR has already received feedback and approval from Product or DX diff --git a/.github/scripts/update_dependencies.sh b/.github/scripts/update_dependencies.sh new file mode 100644 index 000000000..c3df48e52 --- /dev/null +++ b/.github/scripts/update_dependencies.sh @@ -0,0 +1,15 @@ +#!/bin/bash -e +set -e + +git_branch=$1 +target_req_file="dev-requirements.txt" +core_req_sed_pattern="s|dbt-core.git.*#egg=dbt-core|dbt-core.git@${git_branch}#egg=dbt-core|g" +tests_req_sed_pattern="s|dbt-core.git.*#egg=dbt-tests|dbt-core.git@${git_branch}#egg=dbt-tests|g" +if [[ "$OSTYPE" == darwin* ]]; then + # mac ships with a different version of sed that requires a delimiter arg + sed -i "" "$core_req_sed_pattern" $target_req_file + sed -i "" "$tests_req_sed_pattern" $target_req_file +else + sed -i "$core_req_sed_pattern" $target_req_file + sed -i "$tests_req_sed_pattern" $target_req_file +fi diff --git a/.github/workflows/backport.yml b/.github/workflows/backport.yml index 8c0355bda..ea80cbc24 100644 --- a/.github/workflows/backport.yml +++ b/.github/workflows/backport.yml @@ -37,6 +37,6 @@ jobs: github.event.pull_request.merged && contains(github.event.label.name, 'backport') steps: - - uses: tibdex/backport@v2.0.2 + - uses: tibdex/backport@v2 with: github_token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/bot-changelog.yml b/.github/workflows/bot-changelog.yml index 92aff8eb0..89972070e 100644 --- a/.github/workflows/bot-changelog.yml +++ b/.github/workflows/bot-changelog.yml @@ -49,7 +49,7 @@ jobs: - name: Create and commit changelog on bot PR if: ${{ contains(github.event.pull_request.labels.*.name, matrix.label) }} id: bot_changelog - uses: emmyoop/changie_bot@v1.0.1 + uses: emmyoop/changie_bot@v1 with: GITHUB_TOKEN: ${{ secrets.FISHTOWN_BOT_PAT }} commit_author_name: "Github Build Bot" diff --git a/.github/workflows/cut-release-branch.yml b/.github/workflows/cut-release-branch.yml new file mode 100644 index 000000000..f8dfa2173 --- /dev/null +++ b/.github/workflows/cut-release-branch.yml @@ -0,0 +1,42 @@ +# **what?** +# Calls a centralize3d workflow that will: +# 1. Cut a new branch (generally `*.latest`) +# 2. Also cleans up all files in `.changes/unreleased` and `.changes/previous version on +# `main` and bumps `main` to the input version. + +# **why?** +# Generally reduces the workload of engineers and reduces error. Allow automation. + +# **when?** +# This will run when called manually. + +name: Cut new release branch + +on: + workflow_dispatch: + inputs: + version_to_bump_main: + description: 'The alpha version main should bump to (ex. 1.6.0a1)' + required: true + new_branch_name: + description: 'The full name of the new branch (ex. 1.5.latest)' + required: true + +defaults: + run: + shell: bash + +permissions: + contents: write + +jobs: + cut_branch: + name: "Cut branch and clean up main for dbt-spark" + uses: dbt-labs/actions/.github/workflows/cut-release-branch.yml@main + with: + version_to_bump_main: ${{ inputs.version_to_bump_main }} + new_branch_name: ${{ inputs.new_branch_name }} + PR_title: "Cleanup main after cutting new ${{ inputs.new_branch_name }} branch" + PR_body: "This PR will fail CI until the dbt-core PR has been merged due to release version conflicts." + secrets: + FISHTOWN_BOT_PAT: ${{ secrets.FISHTOWN_BOT_PAT }} diff --git a/.github/workflows/jira-creation.yml b/.github/workflows/jira-creation.yml index b4016befc..2611a8bdd 100644 --- a/.github/workflows/jira-creation.yml +++ b/.github/workflows/jira-creation.yml @@ -19,7 +19,9 @@ permissions: jobs: call-label-action: - uses: dbt-labs/jira-actions/.github/workflows/jira-creation.yml@main + uses: dbt-labs/actions/.github/workflows/jira-creation.yml@main + with: + project_key: ADAP secrets: JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }} JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }} diff --git a/.github/workflows/jira-label.yml b/.github/workflows/jira-label.yml index 3da2e3a38..1637cbe38 100644 --- a/.github/workflows/jira-label.yml +++ b/.github/workflows/jira-label.yml @@ -19,7 +19,9 @@ permissions: jobs: call-label-action: - uses: dbt-labs/jira-actions/.github/workflows/jira-label.yml@main + uses: dbt-labs/actions/.github/workflows/jira-label.yml@main + with: + project_key: ADAP secrets: JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }} JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }} diff --git a/.github/workflows/jira-transition.yml b/.github/workflows/jira-transition.yml index ed9f9cd4f..99158a15f 100644 --- a/.github/workflows/jira-transition.yml +++ b/.github/workflows/jira-transition.yml @@ -15,9 +15,14 @@ on: issues: types: [closed, deleted, reopened] +# no special access is needed +permissions: read-all + jobs: call-label-action: - uses: dbt-labs/jira-actions/.github/workflows/jira-transition.yml@main + uses: dbt-labs/actions/.github/workflows/jira-transition.yml@main + with: + project_key: ADAP secrets: JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }} JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }} diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ff1c92ce3..6b3d93b6e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -43,17 +43,18 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: persist-credentials: false - name: Set up Python - uses: actions/setup-python@v4.3.0 + uses: actions/setup-python@v4 with: python-version: '3.8' - name: Install python dependencies run: | + sudo apt-get update sudo apt-get install libsasl2-dev python -m pip install --user --upgrade pip python -m pip --version @@ -78,7 +79,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.7", "3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10"] env: TOXENV: "unit" @@ -86,15 +87,16 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4.3.0 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - name: Install python dependencies run: | + sudo apt-get update sudo apt-get install libsasl2-dev python -m pip install --user --upgrade pip python -m pip --version @@ -106,9 +108,9 @@ jobs: - name: Get current date if: always() id: date - run: echo "::set-output name=date::$(date +'%Y-%m-%dT%H_%M_%S')" #no colons allowed for artifacts + run: echo "date=$(date +'%Y-%m-%dT%H_%M_%S')" >> $GITHUB_OUTPUT #no colons allowed for artifacts - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 if: always() with: name: unit_results_${{ matrix.python-version }}-${{ steps.date.outputs.date }}.csv @@ -124,10 +126,10 @@ jobs: steps: - name: Check out the repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Set up Python - uses: actions/setup-python@v4.3.0 + uses: actions/setup-python@v4 with: python-version: '3.8' @@ -155,9 +157,9 @@ jobs: run: | export is_alpha=0 if [[ "$(ls -lh dist/)" == *"a1"* ]]; then export is_alpha=1; fi - echo "::set-output name=is_alpha::$is_alpha" + echo "is_alpha=$is_alpha" >> $GITHUB_OUTPUT - - uses: actions/upload-artifact@v2 + - uses: actions/upload-artifact@v3 with: name: dist path: dist/ @@ -175,11 +177,11 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: ["3.7", "3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10"] steps: - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4.3.0 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} @@ -188,7 +190,7 @@ jobs: python -m pip install --user --upgrade pip python -m pip install --upgrade wheel python -m pip --version - - uses: actions/download-artifact@v2 + - uses: actions/download-artifact@v3 with: name: dist path: dist/ diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 0aa1298c2..df30809a9 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -98,8 +98,7 @@ jobs: env_setup_script_path: "" test_run: ${{ inputs.test_run }} - secrets: - FISHTOWN_BOT_PAT: ${{ secrets.FISHTOWN_BOT_PAT }} + secrets: inherit log-outputs-audit-version-and-changelog: name: "[Log output] Bump package version, Generate changelog" diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index a56455d55..d902340a9 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -1,15 +1,12 @@ -name: 'Close stale issues and PRs' +name: "Close stale issues and PRs" on: schedule: - cron: "30 1 * * *" + +permissions: + issues: write + pull-requests: write + jobs: stale: - runs-on: ubuntu-latest - steps: - # pinned at v4 (https://github.com/actions/stale/releases/tag/v4.0.0) - - uses: actions/stale@cdf15f641adb27a71842045a94023bef6945e3aa - with: - stale-issue-message: "This issue has been marked as Stale because it has been open for 180 days with no activity. If you would like the issue to remain open, please remove the stale label or comment on the issue, or it will be closed in 7 days." - stale-pr-message: "This PR has been marked as Stale because it has been open for 180 days with no activity. If you would like the PR to remain open, please remove the stale label or comment on the PR, or it will be closed in 7 days." - # mark issues/PRs stale when they haven't seen activity in 180 days - days-before-stale: 180 + uses: dbt-labs/actions/.github/workflows/stale-bot-matrix.yml@main diff --git a/.github/workflows/triage-labels.yml b/.github/workflows/triage-labels.yml new file mode 100644 index 000000000..91f529e3e --- /dev/null +++ b/.github/workflows/triage-labels.yml @@ -0,0 +1,31 @@ +# **what?** +# When the core team triages, we sometimes need more information from the issue creator. In +# those cases we remove the `triage` label and add the `awaiting_response` label. Once we +# recieve a response in the form of a comment, we want the `awaiting_response` label removed +# in favor of the `triage` label so we are aware that the issue needs action. + +# **why?** +# To help with out team triage issue tracking + +# **when?** +# This will run when a comment is added to an issue and that issue has to `awaiting_response` label. + +name: Update Triage Label + +on: issue_comment + +defaults: + run: + shell: bash + +permissions: + issues: write + +jobs: + triage_label: + if: contains(github.event.issue.labels.*.name, 'awaiting_response') + uses: dbt-labs/actions/.github/workflows/swap-labels.yml@main + with: + add_label: "triage" + remove_label: "awaiting_response" + secrets: inherit # this is only acceptable because we own the action we're calling diff --git a/.gitignore b/.gitignore index 189589cf4..33a83848c 100644 --- a/.gitignore +++ b/.gitignore @@ -44,4 +44,3 @@ test.env .hive-metastore/ .spark-warehouse/ dbt-integration-tests -test/integration/.user.yml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4b098e0c2..5e7fdbd04 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,15 +1,12 @@ # For more on configuring pre-commit hooks (see https://pre-commit.com/) -# TODO: remove global exclusion of tests when testing overhaul is complete -exclude: '^tests/.*' - # Force all unspecified python hooks to run python 3.8 default_language_version: python: python3 repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.2.0 + rev: v4.4.0 hooks: - id: check-yaml args: [--unsafe] @@ -18,31 +15,31 @@ repos: - id: trailing-whitespace - id: check-case-conflict - repo: https://github.com/psf/black - rev: 21.12b0 + rev: 23.1.0 hooks: - id: black - additional_dependencies: ['click==8.0.4'] + additional_dependencies: ['click~=8.1'] args: - "--line-length=99" - "--target-version=py38" - id: black alias: black-check stages: [manual] - additional_dependencies: ['click==8.0.4'] + additional_dependencies: ['click~=8.1'] args: - "--line-length=99" - "--target-version=py38" - "--check" - "--diff" - repo: https://github.com/pycqa/flake8 - rev: 4.0.1 + rev: 6.0.0 hooks: - id: flake8 - id: flake8 alias: flake8-check stages: [manual] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.950 + rev: v1.2.0 hooks: - id: mypy # N.B.: Mypy is... a bit fragile. @@ -55,12 +52,12 @@ repos: # of our control to the mix. Unfortunately, there's nothing we can # do about per pre-commit's author. # See https://github.com/pre-commit/pre-commit/issues/730 for details. - args: [--show-error-codes, --ignore-missing-imports] + args: [--show-error-codes, --ignore-missing-imports, --explicit-package-bases, --warn-unused-ignores, --disallow-untyped-defs] files: ^dbt/adapters/.* language: system - id: mypy alias: mypy-check stages: [manual] - args: [--show-error-codes, --pretty, --ignore-missing-imports] + args: [--show-error-codes, --pretty, --ignore-missing-imports, --explicit-package-bases] files: ^dbt/adapters language: system diff --git a/CHANGELOG.md b/CHANGELOG.md index f99421c84..902db37fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,8 +4,11 @@ - Changes are listed under the (pre)release in which they first appear. Subsequent releases include changes from previous releases. - "Breaking changes" listed under a version may require action from end users or external maintainers when upgrading to that version. - Do not edit this file directly. This file is auto-generated using [changie](https://github.com/miniscruff/changie). For details on how to document a change, see [the contributing guide](https://github.com/dbt-labs/dbt-spark/blob/main/CONTRIBUTING.md#adding-changelog-entry) + ## Previous Releases For information on prior major and minor releases, see their changelogs: +- [1.6](https://github.com/dbt-labs/dbt-spark/blob/1.6.latest/CHANGELOG.md) +- [1.5](https://github.com/dbt-labs/dbt-spark/blob/1.5.latest/CHANGELOG.md) - [1.4](https://github.com/dbt-labs/dbt-spark/blob/1.4.latest/CHANGELOG.md) - [1.3](https://github.com/dbt-labs/dbt-spark/blob/1.3.latest/CHANGELOG.md) - [1.2](https://github.com/dbt-labs/dbt-spark/blob/1.2.latest/CHANGELOG.md) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 57d6ee474..a61306ea5 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -66,7 +66,7 @@ $EDITOR test.env There are a few methods for running tests locally. #### `tox` -`tox` takes care of managing Python virtualenvs and installing dependencies in order to run tests. You can also run tests in parallel, for example you can run unit tests for Python 3.7, Python 3.8, Python 3.9, and `flake8` checks in parallel with `tox -p`. Also, you can run unit tests for specific python versions with `tox -e py37`. The configuration of these tests are located in `tox.ini`. +`tox` takes care of managing Python virtualenvs and installing dependencies in order to run tests. You can also run tests in parallel, for example you can run unit tests for Python 3.8, Python 3.9, and `flake8` checks in parallel with `tox -p`. Also, you can run unit tests for specific python versions with `tox -e py38`. The configuration of these tests are located in `tox.ini`. #### `pytest` Finally, you can also run a specific test or group of tests using `pytest` directly. With a Python virtualenv active and dev dependencies installed you can do things like: diff --git a/Makefile b/Makefile index 8b251662b..876440a01 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,14 @@ .DEFAULT_GOAL:=help .PHONY: dev -dev: ## Installs adapter in develop mode along with development depedencies +dev: ## Installs adapter in develop mode along with development dependencies @\ - pip install -r requirements.txt -r dev-requirements.txt && pre-commit install + pip install -e . -r requirements.txt -r dev-requirements.txt && pre-commit install + +.PHONY: dev-uninstall +dev-uninstall: ## Uninstalls all packages while maintaining the virtual environment + ## Useful when updating versions, or if you accidentally installed into the system interpreter + pip freeze | grep -v "^-e" | cut -d "@" -f1 | xargs pip uninstall -y .PHONY: mypy mypy: ## Runs mypy against staged changes for static type checking. diff --git a/README.md b/README.md index 3d9f75d36..fa286b1f7 100644 --- a/README.md +++ b/README.md @@ -5,8 +5,8 @@ Unit Tests Badge - - Integration Tests Badge + + Integration Tests Badge

diff --git a/dbt/adapters/spark/__init__.py b/dbt/adapters/spark/__init__.py index 91ad54768..c25ba40d5 100644 --- a/dbt/adapters/spark/__init__.py +++ b/dbt/adapters/spark/__init__.py @@ -5,8 +5,8 @@ from dbt.adapters.spark.impl import SparkAdapter from dbt.adapters.base import AdapterPlugin -from dbt.include import spark # type: ignore +from dbt.include import spark Plugin = AdapterPlugin( - adapter=SparkAdapter, credentials=SparkCredentials, include_path=spark.PACKAGE_PATH + adapter=SparkAdapter, credentials=SparkCredentials, include_path=spark.PACKAGE_PATH # type: ignore ) diff --git a/dbt/adapters/spark/__version__.py b/dbt/adapters/spark/__version__.py index 219c289b1..874bd74c8 100644 --- a/dbt/adapters/spark/__version__.py +++ b/dbt/adapters/spark/__version__.py @@ -1 +1 @@ -version = "1.5.0a1" +version = "1.7.0a1" diff --git a/dbt/adapters/spark/column.py b/dbt/adapters/spark/column.py index 8100fa450..bde49a492 100644 --- a/dbt/adapters/spark/column.py +++ b/dbt/adapters/spark/column.py @@ -26,7 +26,7 @@ def can_expand_to(self: Self, other_column: Self) -> bool: # type: ignore """returns True if both columns are strings""" return self.is_string() and other_column.is_string() - def literal(self, value): + def literal(self, value: Any) -> str: return "cast({} as {})".format(value, self.dtype) @property diff --git a/dbt/adapters/spark/connections.py b/dbt/adapters/spark/connections.py index 13c1223cf..a939ae753 100644 --- a/dbt/adapters/spark/connections.py +++ b/dbt/adapters/spark/connections.py @@ -22,10 +22,12 @@ pyodbc = None from datetime import datetime import sqlparams - +from dbt.contracts.connection import Connection from hologram.helpers import StrEnum from dataclasses import dataclass, field -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Union, Tuple, List, Generator, Iterable, Sequence + +from abc import ABC, abstractmethod try: from thrift.transport.TSSLSocket import TSSLSocket @@ -44,7 +46,7 @@ NUMBERS = DECIMALS + (int, float) -def _build_odbc_connnection_string(**kwargs) -> str: +def _build_odbc_connnection_string(**kwargs: Any) -> str: return ";".join([f"{k}={v}" for k, v in kwargs.items()]) @@ -73,21 +75,21 @@ class SparkCredentials(Credentials): connect_retries: int = 0 connect_timeout: int = 10 use_ssl: bool = False - server_side_parameters: Dict[str, Any] = field(default_factory=dict) + server_side_parameters: Dict[str, str] = field(default_factory=dict) retry_all: bool = False @classmethod - def __pre_deserialize__(cls, data): + def __pre_deserialize__(cls, data: Any) -> Any: data = super().__pre_deserialize__(data) if "database" not in data: data["database"] = None return data @property - def cluster_id(self): + def cluster_id(self) -> Optional[str]: return self.cluster - def __post_init__(self): + def __post_init__(self) -> None: # spark classifies database and schema as the same thing if self.database is not None and self.database != self.schema: raise dbt.exceptions.DbtRuntimeError( @@ -139,32 +141,77 @@ def __post_init__(self): f"ImportError({e.msg})" ) from e + if self.method != SparkConnectionMethod.SESSION: + self.host = self.host.rstrip("/") + + self.server_side_parameters = { + str(key): str(value) for key, value in self.server_side_parameters.items() + } + @property - def type(self): + def type(self) -> str: return "spark" @property - def unique_field(self): + def unique_field(self) -> str: return self.host - def _connection_keys(self): - return ("host", "port", "cluster", "endpoint", "schema", "organization") + def _connection_keys(self) -> Tuple[str, ...]: + return "host", "port", "cluster", "endpoint", "schema", "organization" + + +class SparkConnectionWrapper(ABC): + @abstractmethod + def cursor(self) -> "SparkConnectionWrapper": + pass + + @abstractmethod + def cancel(self) -> None: + pass + + @abstractmethod + def close(self) -> None: + pass + + @abstractmethod + def rollback(self) -> None: + pass + + @abstractmethod + def fetchall(self) -> Optional[List]: + pass + @abstractmethod + def execute(self, sql: str, bindings: Optional[List[Any]] = None) -> None: + pass -class PyhiveConnectionWrapper(object): + @property + @abstractmethod + def description( + self, + ) -> Sequence[ + Tuple[str, Any, Optional[int], Optional[int], Optional[int], Optional[int], bool] + ]: + pass + + +class PyhiveConnectionWrapper(SparkConnectionWrapper): """Wrap a Spark connection in a way that no-ops transactions""" # https://forums.databricks.com/questions/2157/in-apache-spark-sql-can-we-roll-back-the-transacti.html # noqa - def __init__(self, handle): + handle: "pyodbc.Connection" + _cursor: "Optional[pyodbc.Cursor]" + + def __init__(self, handle: "pyodbc.Connection") -> None: self.handle = handle self._cursor = None - def cursor(self): + def cursor(self) -> "PyhiveConnectionWrapper": self._cursor = self.handle.cursor() return self - def cancel(self): + def cancel(self) -> None: if self._cursor: # Handle bad response in the pyhive lib when # the connection is cancelled @@ -173,7 +220,7 @@ def cancel(self): except EnvironmentError as exc: logger.debug("Exception while cancelling query: {}".format(exc)) - def close(self): + def close(self) -> None: if self._cursor: # Handle bad response in the pyhive lib when # the connection is cancelled @@ -183,13 +230,14 @@ def close(self): logger.debug("Exception while closing cursor: {}".format(exc)) self.handle.close() - def rollback(self, *args, **kwargs): + def rollback(self, *args: Any, **kwargs: Any) -> None: logger.debug("NotImplemented: rollback") - def fetchall(self): + def fetchall(self) -> List["pyodbc.Row"]: + assert self._cursor, "Cursor not available" return self._cursor.fetchall() - def execute(self, sql, bindings=None): + def execute(self, sql: str, bindings: Optional[List[Any]] = None) -> None: if sql.strip().endswith(";"): sql = sql.strip()[:-1] @@ -211,6 +259,8 @@ def execute(self, sql, bindings=None): if bindings is not None: bindings = [self._fix_binding(binding) for binding in bindings] + assert self._cursor, "Cursor not available" + self._cursor.execute(sql, bindings, async_=True) poll_state = self._cursor.poll() state = poll_state.operationState @@ -244,7 +294,7 @@ def execute(self, sql, bindings=None): logger.debug("Poll status: {}, query complete".format(state)) @classmethod - def _fix_binding(cls, value): + def _fix_binding(cls, value: Any) -> Union[float, str]: """Convert complex datatypes to primitives that can be loaded by the Spark driver""" if isinstance(value, NUMBERS): @@ -255,12 +305,18 @@ def _fix_binding(cls, value): return value @property - def description(self): + def description( + self, + ) -> Sequence[ + Tuple[str, Any, Optional[int], Optional[int], Optional[int], Optional[int], bool] + ]: + assert self._cursor, "Cursor not available" return self._cursor.description class PyodbcConnectionWrapper(PyhiveConnectionWrapper): - def execute(self, sql, bindings=None): + def execute(self, sql: str, bindings: Optional[List[Any]] = None) -> None: + assert self._cursor, "Cursor not available" if sql.strip().endswith(";"): sql = sql.strip()[:-1] # pyodbc does not handle a None type binding! @@ -281,7 +337,7 @@ class SparkConnectionManager(SQLConnectionManager): SPARK_CONNECTION_URL = "{host}:{port}" + SPARK_CLUSTER_HTTP_PATH @contextmanager - def exception_handler(self, sql): + def exception_handler(self, sql: str) -> Generator[None, None, None]: try: yield @@ -298,30 +354,30 @@ def exception_handler(self, sql): else: raise dbt.exceptions.DbtRuntimeError(str(exc)) - def cancel(self, connection): + def cancel(self, connection: Connection) -> None: connection.handle.cancel() @classmethod - def get_response(cls, cursor) -> AdapterResponse: + def get_response(cls, cursor: Any) -> AdapterResponse: # https://github.com/dbt-labs/dbt-spark/issues/142 message = "OK" return AdapterResponse(_message=message) # No transactions on Spark.... - def add_begin_query(self, *args, **kwargs): + def add_begin_query(self, *args: Any, **kwargs: Any) -> None: logger.debug("NotImplemented: add_begin_query") - def add_commit_query(self, *args, **kwargs): + def add_commit_query(self, *args: Any, **kwargs: Any) -> None: logger.debug("NotImplemented: add_commit_query") - def commit(self, *args, **kwargs): + def commit(self, *args: Any, **kwargs: Any) -> None: logger.debug("NotImplemented: commit") - def rollback(self, *args, **kwargs): + def rollback(self, *args: Any, **kwargs: Any) -> None: logger.debug("NotImplemented: rollback") @classmethod - def validate_creds(cls, creds, required): + def validate_creds(cls, creds: Any, required: Iterable[str]) -> None: method = creds.method for key in required: @@ -332,13 +388,14 @@ def validate_creds(cls, creds, required): ) @classmethod - def open(cls, connection): + def open(cls, connection: Connection) -> Connection: if connection.state == ConnectionState.OPEN: logger.debug("Connection is already open, skipping open.") return connection creds = connection.credentials exc = None + handle: SparkConnectionWrapper for i in range(1 + creds.connect_retries): try: @@ -365,7 +422,10 @@ def open(cls, connection): token = base64.standard_b64encode(raw_token).decode() transport.setCustomHeaders({"Authorization": "Basic {}".format(token)}) - conn = hive.connect(thrift_transport=transport) + conn = hive.connect( + thrift_transport=transport, + configuration=creds.server_side_parameters, + ) handle = PyhiveConnectionWrapper(conn) elif creds.method == SparkConnectionMethod.THRIFT: cls.validate_creds(creds, ["host", "port", "user", "schema"]) @@ -453,7 +513,9 @@ def open(cls, connection): SessionConnectionWrapper, ) - handle = SessionConnectionWrapper(Connection()) + handle = SessionConnectionWrapper( + Connection(server_side_parameters=creds.server_side_parameters) + ) else: raise dbt.exceptions.DbtProfileError( f"invalid credential method: {creds.method}" @@ -490,14 +552,34 @@ def open(cls, connection): else: raise dbt.exceptions.FailedToConnectError("failed to connect") from e else: - raise exc + raise exc # type: ignore connection.handle = handle connection.state = ConnectionState.OPEN return connection - -def build_ssl_transport(host, port, username, auth, kerberos_service_name, password=None): + @classmethod + def data_type_code_to_name(cls, type_code: Union[type, str]) -> str: # type: ignore + """ + :param Union[type, str] type_code: The sql to execute. + * type_code is a python type (!) in pyodbc https://github.com/mkleehammer/pyodbc/wiki/Cursor#description, and a string for other spark runtimes. + * ignoring the type annotation on the signature for this adapter instead of updating the base class because this feels like a really special case. + :return: stringified the cursor type_code + :rtype: str + """ + if isinstance(type_code, str): + return type_code + return type_code.__name__.upper() + + +def build_ssl_transport( + host: str, + port: int, + username: str, + auth: str, + kerberos_service_name: str, + password: Optional[str] = None, +) -> "thrift_sasl.TSaslClientTransport": transport = None if port is None: port = 10000 @@ -521,7 +603,7 @@ def build_ssl_transport(host, port, username, auth, kerberos_service_name, passw # to be nonempty. password = "x" - def sasl_factory(): + def sasl_factory() -> sasl.Client: sasl_client = sasl.Client() sasl_client.setAttr("host", host) if sasl_auth == "GSSAPI": diff --git a/dbt/adapters/spark/impl.py b/dbt/adapters/spark/impl.py index fea5bbacf..2864c4f30 100644 --- a/dbt/adapters/spark/impl.py +++ b/dbt/adapters/spark/impl.py @@ -1,18 +1,20 @@ import re from concurrent.futures import Future from dataclasses import dataclass -from typing import Any, Dict, Iterable, List, Optional, Union, Type +from typing import Any, Dict, Iterable, List, Optional, Union, Type, Tuple, Callable, Set + +from dbt.adapters.base.relation import InformationSchema +from dbt.contracts.graph.manifest import Manifest + from typing_extensions import TypeAlias import agate -from dbt.contracts.relation import RelationType import dbt import dbt.exceptions from dbt.adapters.base import AdapterConfig, PythonJobHelper -from dbt.adapters.base.impl import catch_as_completed -from dbt.contracts.connection import AdapterResponse +from dbt.adapters.base.impl import catch_as_completed, ConstraintSupport from dbt.adapters.sql import SQLAdapter from dbt.adapters.spark import SparkConnectionManager from dbt.adapters.spark import SparkRelation @@ -23,8 +25,10 @@ ) from dbt.adapters.base import BaseRelation from dbt.clients.agate_helper import DEFAULT_TYPE_TESTER +from dbt.contracts.connection import AdapterResponse +from dbt.contracts.graph.nodes import ConstraintType +from dbt.contracts.relation import RelationType from dbt.events import AdapterLogger -from dbt.flags import get_flags from dbt.utils import executor, AttrDict logger = AdapterLogger("Spark") @@ -32,8 +36,8 @@ GET_COLUMNS_IN_RELATION_RAW_MACRO_NAME = "get_columns_in_relation_raw" LIST_SCHEMAS_MACRO_NAME = "list_schemas" LIST_RELATIONS_MACRO_NAME = "list_relations_without_caching" -DROP_RELATION_MACRO_NAME = "drop_relation" -FETCH_TBL_PROPERTIES_MACRO_NAME = "fetch_tbl_properties" +LIST_RELATIONS_SHOW_TABLES_MACRO_NAME = "list_relations_show_tables_without_caching" +DESCRIBE_TABLE_EXTENDED_MACRO_NAME = "describe_table_extended_without_caching" KEY_TABLE_OWNER = "Owner" KEY_TABLE_STATISTICS = "Statistics" @@ -80,6 +84,7 @@ class SparkAdapter(SQLAdapter): INFORMATION_COLUMNS_REGEX = re.compile(r"^ \|-- (.*): (.*) \(nullable = (.*)\b", re.MULTILINE) INFORMATION_OWNER_REGEX = re.compile(r"^Owner: (.*)$", re.MULTILINE) INFORMATION_STATISTICS_REGEX = re.compile(r"^Statistics: (.*)$", re.MULTILINE) + HUDI_METADATA_COLUMNS = [ "_hoodie_commit_time", "_hoodie_commit_seqno", @@ -88,7 +93,16 @@ class SparkAdapter(SQLAdapter): "_hoodie_file_name", ] + CONSTRAINT_SUPPORT = { + ConstraintType.check: ConstraintSupport.NOT_ENFORCED, + ConstraintType.not_null: ConstraintSupport.NOT_ENFORCED, + ConstraintType.unique: ConstraintSupport.NOT_ENFORCED, + ConstraintType.primary_key: ConstraintSupport.NOT_ENFORCED, + ConstraintType.foreign_key: ConstraintSupport.NOT_ENFORCED, + } + Relation: TypeAlias = SparkRelation + RelationInfo = Tuple[str, str, str] Column: TypeAlias = SparkColumn ConnectionManager: TypeAlias = SparkConnectionManager AdapterSpecificConfigs: TypeAlias = SparkConfig @@ -98,86 +112,145 @@ def date_function(cls) -> str: return "current_timestamp()" @classmethod - def convert_text_type(cls, agate_table, col_idx): + def convert_text_type(cls, agate_table: agate.Table, col_idx: int) -> str: return "string" @classmethod - def convert_number_type(cls, agate_table, col_idx): + def convert_number_type(cls, agate_table: agate.Table, col_idx: int) -> str: decimals = agate_table.aggregate(agate.MaxPrecision(col_idx)) return "double" if decimals else "bigint" @classmethod - def convert_date_type(cls, agate_table, col_idx): + def convert_date_type(cls, agate_table: agate.Table, col_idx: int) -> str: return "date" @classmethod - def convert_time_type(cls, agate_table, col_idx): + def convert_time_type(cls, agate_table: agate.Table, col_idx: int) -> str: return "time" @classmethod - def convert_datetime_type(cls, agate_table, col_idx): + def convert_datetime_type(cls, agate_table: agate.Table, col_idx: int) -> str: return "timestamp" - def quote(self, identifier): + def quote(self, identifier: str) -> str: # type: ignore return "`{}`".format(identifier) - def add_schema_to_cache(self, schema) -> str: - """Cache a new schema in dbt. It will show up in `list relations`.""" - if schema is None: - name = self.nice_connection_name() - raise dbt.exceptions.CompilationError( - "Attempted to cache a null schema for {}".format(name) + def _get_relation_information(self, row: agate.Row) -> RelationInfo: + """relation info was fetched with SHOW TABLES EXTENDED""" + try: + _schema, name, _, information = row + except ValueError: + raise dbt.exceptions.DbtRuntimeError( + f'Invalid value from "show tables extended ...", got {len(row)} values, expected 4' ) - if get_flags().USE_CACHE: # type: ignore - self.cache.add_schema(None, schema) - # so jinja doesn't render things - return "" - - def list_relations_without_caching( - self, schema_relation: SparkRelation - ) -> List[SparkRelation]: - kwargs = {"schema_relation": schema_relation} + + return _schema, name, information + + def _get_relation_information_using_describe(self, row: agate.Row) -> RelationInfo: + """Relation info fetched using SHOW TABLES and an auxiliary DESCRIBE statement""" + try: + _schema, name, _ = row + except ValueError: + raise dbt.exceptions.DbtRuntimeError( + f'Invalid value from "show tables ...", got {len(row)} values, expected 3' + ) + + table_name = f"{_schema}.{name}" try: - results = self.execute_macro(LIST_RELATIONS_MACRO_NAME, kwargs=kwargs) + table_results = self.execute_macro( + DESCRIBE_TABLE_EXTENDED_MACRO_NAME, kwargs={"table_name": table_name} + ) except dbt.exceptions.DbtRuntimeError as e: - errmsg = getattr(e, "msg", "") - if f"Database '{schema_relation}' not found" in errmsg: - return [] - else: - description = "Error while retrieving information about" - logger.debug(f"{description} {schema_relation}: {e.msg}") - return [] + logger.debug(f"Error while retrieving information about {table_name}: {e.msg}") + table_results = AttrDict() + + information = "" + for info_row in table_results: + info_type, info_value, _ = info_row + if not info_type.startswith("#"): + information += f"{info_type}: {info_value}\n" + + return _schema, name, information + def _build_spark_relation_list( + self, + row_list: agate.Table, + relation_info_func: Callable[[agate.Row], RelationInfo], + ) -> List[BaseRelation]: + """Aggregate relations with format metadata included.""" relations = [] - for row in results: - if len(row) != 4: - raise dbt.exceptions.DbtRuntimeError( - f'Invalid value from "show table extended ...", ' - f"got {len(row)} values, expected 4" - ) - _schema, name, _, information = row - rel_type = RelationType.View if "Type: VIEW" in information else RelationType.Table - is_delta = "Provider: delta" in information - is_hudi = "Provider: hudi" in information + for row in row_list: + _schema, name, information = relation_info_func(row) + + rel_type: RelationType = ( + RelationType.View if "Type: VIEW" in information else RelationType.Table + ) + is_delta: bool = "Provider: delta" in information + is_hudi: bool = "Provider: hudi" in information + is_iceberg: bool = "Provider: iceberg" in information + relation: BaseRelation = self.Relation.create( schema=_schema, identifier=name, type=rel_type, information=information, is_delta=is_delta, + is_iceberg=is_iceberg, is_hudi=is_hudi, ) relations.append(relation) return relations + def list_relations_without_caching(self, schema_relation: BaseRelation) -> List[BaseRelation]: + """Distinct Spark compute engines may not support the same SQL featureset. Thus, we must + try different methods to fetch relation information.""" + + kwargs = {"schema_relation": schema_relation} + + try: + # Default compute engine behavior: show tables extended + show_table_extended_rows = self.execute_macro(LIST_RELATIONS_MACRO_NAME, kwargs=kwargs) + return self._build_spark_relation_list( + row_list=show_table_extended_rows, + relation_info_func=self._get_relation_information, + ) + except dbt.exceptions.DbtRuntimeError as e: + errmsg = getattr(e, "msg", "") + if f"Database '{schema_relation}' not found" in errmsg: + return [] + # Iceberg compute engine behavior: show table + elif "SHOW TABLE EXTENDED is not supported for v2 tables" in errmsg: + # this happens with spark-iceberg with v2 iceberg tables + # https://issues.apache.org/jira/browse/SPARK-33393 + try: + # Iceberg behavior: 3-row result of relations obtained + show_table_rows = self.execute_macro( + LIST_RELATIONS_SHOW_TABLES_MACRO_NAME, kwargs=kwargs + ) + return self._build_spark_relation_list( + row_list=show_table_rows, + relation_info_func=self._get_relation_information_using_describe, + ) + except dbt.exceptions.DbtRuntimeError as e: + description = "Error while retrieving information about" + logger.debug(f"{description} {schema_relation}: {e.msg}") + return [] + else: + logger.debug( + f"Error while retrieving information about {schema_relation}: {errmsg}" + ) + return [] + def get_relation(self, database: str, schema: str, identifier: str) -> Optional[BaseRelation]: if not self.Relation.get_default_include_policy().database: database = None # type: ignore return super().get_relation(database, schema, identifier) - def parse_describe_extended(self, relation: Relation, raw_rows: AttrDict) -> List[SparkColumn]: + def parse_describe_extended( + self, relation: BaseRelation, raw_rows: AttrDict + ) -> List[SparkColumn]: # Convert the Row to a dict dict_rows = [dict(zip(row._keys, row._values)) for row in raw_rows] # Find the separator between the rows and the metadata provided @@ -214,7 +287,7 @@ def find_table_information_separator(rows: List[dict]) -> int: pos += 1 return pos - def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]: + def get_columns_in_relation(self, relation: BaseRelation) -> List[SparkColumn]: columns = [] try: rows: AttrDict = self.execute_macro( @@ -235,12 +308,16 @@ def get_columns_in_relation(self, relation: Relation) -> List[SparkColumn]: columns = [x for x in columns if x.name not in self.HUDI_METADATA_COLUMNS] return columns - def parse_columns_from_information(self, relation: SparkRelation) -> List[SparkColumn]: - owner_match = re.findall(self.INFORMATION_OWNER_REGEX, relation.information) + def parse_columns_from_information(self, relation: BaseRelation) -> List[SparkColumn]: + if hasattr(relation, "information"): + information = relation.information or "" + else: + information = "" + owner_match = re.findall(self.INFORMATION_OWNER_REGEX, information) owner = owner_match[0] if owner_match else None - matches = re.finditer(self.INFORMATION_COLUMNS_REGEX, relation.information) + matches = re.finditer(self.INFORMATION_COLUMNS_REGEX, information) columns = [] - stats_match = re.findall(self.INFORMATION_STATISTICS_REGEX, relation.information) + stats_match = re.findall(self.INFORMATION_STATISTICS_REGEX, information) raw_table_stats = stats_match[0] if stats_match else None table_stats = SparkColumn.convert_table_stats(raw_table_stats) for match_num, match in enumerate(matches): @@ -259,7 +336,7 @@ def parse_columns_from_information(self, relation: SparkRelation) -> List[SparkC columns.append(column) return columns - def _get_columns_for_catalog(self, relation: SparkRelation) -> Iterable[Dict[str, Any]]: + def _get_columns_for_catalog(self, relation: BaseRelation) -> Iterable[Dict[str, Any]]: columns = self.parse_columns_from_information(relation) for column in columns: @@ -270,13 +347,7 @@ def _get_columns_for_catalog(self, relation: SparkRelation) -> Iterable[Dict[str as_dict["table_database"] = None yield as_dict - def get_properties(self, relation: Relation) -> Dict[str, str]: - properties = self.execute_macro( - FETCH_TBL_PROPERTIES_MACRO_NAME, kwargs={"relation": relation} - ) - return dict(properties) - - def get_catalog(self, manifest): + def get_catalog(self, manifest: Manifest) -> Tuple[agate.Table, List[Exception]]: schema_map = self._get_catalog_schemas(manifest) if len(schema_map) > 1: raise dbt.exceptions.CompilationError( @@ -302,9 +373,9 @@ def get_catalog(self, manifest): def _get_one_catalog( self, - information_schema, - schemas, - manifest, + information_schema: InformationSchema, + schemas: Set[str], + manifest: Manifest, ) -> agate.Table: if len(schemas) != 1: raise dbt.exceptions.CompilationError( @@ -316,11 +387,11 @@ def _get_one_catalog( columns: List[Dict[str, Any]] = [] for relation in self.list_relations(database, schema): - logger.debug("Getting table schema for relation {}", relation) + logger.debug("Getting table schema for relation {}", str(relation)) columns.extend(self._get_columns_for_catalog(relation)) return agate.Table.from_object(columns, column_types=DEFAULT_TYPE_TESTER) - def check_schema_exists(self, database, schema): + def check_schema_exists(self, database: str, schema: str) -> bool: results = self.execute_macro(LIST_SCHEMAS_MACRO_NAME, kwargs={"database": database}) exists = True if schema in [row[0] for row in results] else False @@ -333,7 +404,7 @@ def get_rows_different_sql( column_names: Optional[List[str]] = None, except_operator: str = "EXCEPT", ) -> str: - """Generate SQL for a query that returns a single row with a two + """Generate SQL for a query that returns a single row with two columns: the number of rows that are different between the two relations and the number of mismatched rows. """ @@ -357,7 +428,7 @@ def get_rows_different_sql( # This is for use in the test suite # Spark doesn't have 'commit' and 'rollback', so this override # doesn't include those commands. - def run_sql_for_tests(self, sql, fetch, conn): + def run_sql_for_tests(self, sql, fetch, conn): # type: ignore cursor = conn.handle.cursor() try: cursor.execute(sql) @@ -409,6 +480,10 @@ def standardize_grants_dict(self, grants_table: agate.Table) -> dict: grants_dict.update({privilege: [grantee]}) return grants_dict + def debug_query(self) -> None: + """Override for DebugTask method""" + self.execute("select 1 as id") + # spark does something interesting with joins when both tables have the same # static values for the join condition and complains that the join condition is diff --git a/dbt/adapters/spark/python_submissions.py b/dbt/adapters/spark/python_submissions.py index 47529e079..89831ca7f 100644 --- a/dbt/adapters/spark/python_submissions.py +++ b/dbt/adapters/spark/python_submissions.py @@ -1,7 +1,7 @@ import base64 import time import requests -from typing import Any, Dict +from typing import Any, Dict, Callable, Iterable import uuid import dbt.exceptions @@ -149,18 +149,18 @@ def submit(self, compiled_code: str) -> None: def polling( self, - status_func, - status_func_kwargs, - get_state_func, - terminal_states, - expected_end_state, - get_state_msg_func, + status_func: Callable, + status_func_kwargs: Dict, + get_state_func: Callable, + terminal_states: Iterable[str], + expected_end_state: str, + get_state_msg_func: Callable, ) -> Dict: state = None start = time.time() exceeded_timeout = False - response = {} - while state not in terminal_states: + response: Dict = {} + while state is None or state not in terminal_states: if time.time() - start > self.timeout: exceeded_timeout = True break diff --git a/dbt/adapters/spark/relation.py b/dbt/adapters/spark/relation.py index 0b0c58bc1..e80f2623f 100644 --- a/dbt/adapters/spark/relation.py +++ b/dbt/adapters/spark/relation.py @@ -1,9 +1,14 @@ -from typing import Optional - +from typing import Optional, TypeVar from dataclasses import dataclass, field from dbt.adapters.base.relation import BaseRelation, Policy + from dbt.exceptions import DbtRuntimeError +from dbt.events import AdapterLogger + +logger = AdapterLogger("Spark") + +Self = TypeVar("Self", bound="BaseRelation") @dataclass @@ -27,13 +32,15 @@ class SparkRelation(BaseRelation): quote_character: str = "`" is_delta: Optional[bool] = None is_hudi: Optional[bool] = None + is_iceberg: Optional[bool] = None + # TODO: make this a dict everywhere information: Optional[str] = None - def __post_init__(self): + def __post_init__(self) -> None: if self.database != self.schema and self.database: raise DbtRuntimeError("Cannot set database in spark!") - def render(self): + def render(self) -> str: if self.include_policy.database and self.include_policy.schema: raise DbtRuntimeError( "Got a spark relation with schema and database set to " diff --git a/dbt/adapters/spark/session.py b/dbt/adapters/spark/session.py index beb77d548..b5b2bebdb 100644 --- a/dbt/adapters/spark/session.py +++ b/dbt/adapters/spark/session.py @@ -4,11 +4,14 @@ import datetime as dt from types import TracebackType -from typing import Any, List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple, Union, Sequence +from dbt.adapters.spark.connections import SparkConnectionWrapper from dbt.events import AdapterLogger from dbt.utils import DECIMALS +from dbt.exceptions import DbtRuntimeError from pyspark.sql import DataFrame, Row, SparkSession +from pyspark.sql.utils import AnalysisException logger = AdapterLogger("Spark") @@ -24,9 +27,10 @@ class Cursor: https://github.com/mkleehammer/pyodbc/wiki/Cursor """ - def __init__(self) -> None: + def __init__(self, *, server_side_parameters: Optional[Dict[str, Any]] = None) -> None: self._df: Optional[DataFrame] = None self._rows: Optional[List[Row]] = None + self.server_side_parameters = server_side_parameters or {} def __enter__(self) -> Cursor: return self @@ -43,13 +47,15 @@ def __exit__( @property def description( self, - ) -> List[Tuple[str, str, None, None, None, None, bool]]: + ) -> Sequence[ + Tuple[str, Any, Optional[int], Optional[int], Optional[int], Optional[int], bool] + ]: """ Get the description. Returns ------- - out : List[Tuple[str, str, None, None, None, None, bool]] + out : Sequence[Tuple[str, str, None, None, None, None, bool]] The description. Source @@ -106,8 +112,18 @@ def execute(self, sql: str, *parameters: Any) -> None: """ if len(parameters) > 0: sql = sql % parameters - spark_session = SparkSession.builder.enableHiveSupport().getOrCreate() - self._df = spark_session.sql(sql) + + builder = SparkSession.builder.enableHiveSupport() + + for parameter, value in self.server_side_parameters.items(): + builder = builder.config(parameter, value) + + spark_session = builder.getOrCreate() + + try: + self._df = spark_session.sql(sql) + except AnalysisException as exc: + raise DbtRuntimeError(str(exc)) from exc def fetchall(self) -> Optional[List[Row]]: """ @@ -140,7 +156,7 @@ def fetchone(self) -> Optional[Row]: https://github.com/mkleehammer/pyodbc/wiki/Cursor#fetchone """ if self._rows is None and self._df is not None: - self._rows = self._df.collect() + self._rows = self._df.take(1) if self._rows is not None and len(self._rows) > 0: row = self._rows.pop(0) @@ -159,6 +175,9 @@ class Connection: https://github.com/mkleehammer/pyodbc/wiki/Connection """ + def __init__(self, *, server_side_parameters: Optional[Dict[Any, str]] = None) -> None: + self.server_side_parameters = server_side_parameters or {} + def cursor(self) -> Cursor: """ Get a cursor. @@ -168,37 +187,42 @@ def cursor(self) -> Cursor: out : Cursor The cursor. """ - return Cursor() + return Cursor(server_side_parameters=self.server_side_parameters) -class SessionConnectionWrapper(object): - """Connection wrapper for the sessoin connection method.""" +class SessionConnectionWrapper(SparkConnectionWrapper): + """Connection wrapper for the session connection method.""" - def __init__(self, handle): + handle: Connection + _cursor: Optional[Cursor] + + def __init__(self, handle: Connection) -> None: self.handle = handle self._cursor = None - def cursor(self): + def cursor(self) -> "SessionConnectionWrapper": self._cursor = self.handle.cursor() return self - def cancel(self): + def cancel(self) -> None: logger.debug("NotImplemented: cancel") - def close(self): + def close(self) -> None: if self._cursor: self._cursor.close() - def rollback(self, *args, **kwargs): + def rollback(self, *args: Any, **kwargs: Any) -> None: logger.debug("NotImplemented: rollback") - def fetchall(self): + def fetchall(self) -> Optional[List[Row]]: + assert self._cursor, "Cursor not available" return self._cursor.fetchall() - def execute(self, sql, bindings=None): + def execute(self, sql: str, bindings: Optional[List[Any]] = None) -> None: if sql.strip().endswith(";"): sql = sql.strip()[:-1] + assert self._cursor, "Cursor not available" if bindings is None: self._cursor.execute(sql) else: @@ -206,11 +230,16 @@ def execute(self, sql, bindings=None): self._cursor.execute(sql, *bindings) @property - def description(self): + def description( + self, + ) -> Sequence[ + Tuple[str, Any, Optional[int], Optional[int], Optional[int], Optional[int], bool] + ]: + assert self._cursor, "Cursor not available" return self._cursor.description @classmethod - def _fix_binding(cls, value): + def _fix_binding(cls, value: Any) -> Union[str, float]: """Convert complex datatypes to primitives that can be loaded by the Spark driver""" if isinstance(value, NUMBERS): diff --git a/dbt/include/spark/macros/adapters.sql b/dbt/include/spark/macros/adapters.sql index abc7a0ba3..471d1deef 100644 --- a/dbt/include/spark/macros/adapters.sql +++ b/dbt/include/spark/macros/adapters.sql @@ -1,3 +1,18 @@ +{% macro tblproperties_clause() %} + {{ return(adapter.dispatch('tblproperties_clause', 'dbt')()) }} +{%- endmacro -%} + +{% macro spark__tblproperties_clause() -%} + {%- set tblproperties = config.get('tblproperties') -%} + {%- if tblproperties is not none %} + tblproperties ( + {%- for prop in tblproperties -%} + '{{ prop }}' = '{{ tblproperties[prop] }}' {% if not loop.last %}, {% endif %} + {%- endfor %} + ) + {%- endif %} +{%- endmacro -%} + {% macro file_format_clause() %} {{ return(adapter.dispatch('file_format_clause', 'dbt')()) }} {%- endmacro -%} @@ -133,17 +148,24 @@ {%- if temporary -%} {{ create_temporary_view(relation, compiled_code) }} {%- else -%} - {% if config.get('file_format', validator=validation.any[basestring]) == 'delta' %} + {% if config.get('file_format', validator=validation.any[basestring]) in ['delta', 'iceberg'] %} create or replace table {{ relation }} {% else %} create table {{ relation }} {% endif %} + {%- set contract_config = config.get('contract') -%} + {%- if contract_config.enforced -%} + {{ get_assert_columns_equivalent(compiled_code) }} + {%- set compiled_code = get_select_subquery(compiled_code) %} + {% endif %} {{ file_format_clause() }} {{ options_clause() }} + {{ tblproperties_clause() }} {{ partition_cols(label="partitioned by") }} {{ clustered_cols(label="clustered by") }} {{ location_clause() }} {{ comment_clause() }} + as {{ compiled_code }} {%- endif -%} @@ -160,9 +182,61 @@ {%- endmacro -%} +{% macro persist_constraints(relation, model) %} + {{ return(adapter.dispatch('persist_constraints', 'dbt')(relation, model)) }} +{% endmacro %} + +{% macro spark__persist_constraints(relation, model) %} + {%- set contract_config = config.get('contract') -%} + {% if contract_config.enforced and config.get('file_format', 'delta') == 'delta' %} + {% do alter_table_add_constraints(relation, model.constraints) %} + {% do alter_column_set_constraints(relation, model.columns) %} + {% endif %} +{% endmacro %} + +{% macro alter_table_add_constraints(relation, constraints) %} + {{ return(adapter.dispatch('alter_table_add_constraints', 'dbt')(relation, constraints)) }} +{% endmacro %} + +{% macro spark__alter_table_add_constraints(relation, constraints) %} + {% for constraint in constraints %} + {% if constraint.type == 'check' and not is_incremental() %} + {%- set constraint_hash = local_md5(column_name ~ ";" ~ constraint.expression ~ ";" ~ loop.index) -%} + {% call statement() %} + alter table {{ relation }} add constraint {{ constraint.name if constraint.name else constraint_hash }} check ({{ constraint.expression }}); + {% endcall %} + {% endif %} + {% endfor %} +{% endmacro %} + +{% macro alter_column_set_constraints(relation, column_dict) %} + {{ return(adapter.dispatch('alter_column_set_constraints', 'dbt')(relation, column_dict)) }} +{% endmacro %} + +{% macro spark__alter_column_set_constraints(relation, column_dict) %} + {% for column_name in column_dict %} + {% set constraints = column_dict[column_name]['constraints'] %} + {% for constraint in constraints %} + {% if constraint.type != 'not_null' %} + {{ exceptions.warn('Invalid constraint for column ' ~ column_name ~ '. Only `not_null` is supported.') }} + {% else %} + {% set quoted_name = adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name %} + {% call statement() %} + alter table {{ relation }} change column {{ quoted_name }} set not null {{ constraint.expression or "" }}; + {% endcall %} + {% endif %} + {% endfor %} + {% endfor %} +{% endmacro %} + + {% macro spark__create_view_as(relation, sql) -%} create or replace view {{ relation }} {{ comment_clause() }} + {%- set contract_config = config.get('contract') -%} + {%- if contract_config.enforced -%} + {{ get_assert_columns_equivalent(sql) }} + {%- endif %} as {{ sql }} {% endmacro %} @@ -191,7 +265,10 @@ {% endmacro %} {% macro spark__get_columns_in_relation(relation) -%} - {{ return(adapter.get_columns_in_relation(relation)) }} + {% call statement('get_columns_in_relation', fetch_result=True) %} + describe extended {{ relation.include(schema=(schema is not none)) }} + {% endcall %} + {% do return(load_result('get_columns_in_relation').table) %} {% endmacro %} {% macro spark__list_relations_without_caching(relation) %} @@ -202,6 +279,27 @@ {% do return(load_result('list_relations_without_caching').table) %} {% endmacro %} +{% macro list_relations_show_tables_without_caching(schema_relation) %} + {#-- Spark with iceberg tables don't work with show table extended for #} + {#-- V2 iceberg tables #} + {#-- https://issues.apache.org/jira/browse/SPARK-33393 #} + {% call statement('list_relations_without_caching_show_tables', fetch_result=True) -%} + show tables in {{ schema_relation }} like '*' + {% endcall %} + + {% do return(load_result('list_relations_without_caching_show_tables').table) %} +{% endmacro %} + +{% macro describe_table_extended_without_caching(table_name) %} + {#-- Spark with iceberg tables don't work with show table extended for #} + {#-- V2 iceberg tables #} + {#-- https://issues.apache.org/jira/browse/SPARK-33393 #} + {% call statement('describe_table_extended_without_caching', fetch_result=True) -%} + describe extended {{ table_name }} + {% endcall %} + {% do return(load_result('describe_table_extended_without_caching').table) %} +{% endmacro %} + {% macro spark__list_schemas(database) -%} {% call statement('list_schemas', fetch_result=True, auto_begin=False) %} show databases @@ -241,14 +339,20 @@ {% endmacro %} {% macro spark__alter_column_comment(relation, column_dict) %} - {% if config.get('file_format', validator=validation.any[basestring]) in ['delta', 'hudi'] %} + {% if config.get('file_format', validator=validation.any[basestring]) in ['delta', 'hudi', 'iceberg'] %} {% for column_name in column_dict %} {% set comment = column_dict[column_name]['description'] %} {% set escaped_comment = comment | replace('\'', '\\\'') %} {% set comment_query %} - alter table {{ relation }} change column - {{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }} - comment '{{ escaped_comment }}'; + {% if relation.is_iceberg %} + alter table {{ relation }} alter column + {{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }} + comment '{{ escaped_comment }}'; + {% else %} + alter table {{ relation }} change column + {{ adapter.quote(column_name) if column_dict[column_name]['quote'] else column_name }} + comment '{{ escaped_comment }}'; + {% endif %} {% endset %} {% do run_query(comment_query) %} {% endfor %} @@ -276,7 +380,13 @@ {% macro spark__alter_relation_add_remove_columns(relation, add_columns, remove_columns) %} {% if remove_columns %} - {% set platform_name = 'Delta Lake' if relation.is_delta else 'Apache Spark' %} + {% if relation.is_delta %} + {% set platform_name = 'Delta Lake' %} + {% elif relation.is_iceberg %} + {% set platform_name = 'Iceberg' %} + {% else %} + {% set platform_name = 'Apache Spark' %} + {% endif %} {{ exceptions.raise_compiler_error(platform_name + ' does not support dropping columns from tables') }} {% endif %} diff --git a/dbt/include/spark/macros/materializations/clone.sql b/dbt/include/spark/macros/materializations/clone.sql new file mode 100644 index 000000000..a5b80a5c1 --- /dev/null +++ b/dbt/include/spark/macros/materializations/clone.sql @@ -0,0 +1,76 @@ +{% macro spark__can_clone_table() %} + {{ return(True) }} +{% endmacro %} + +{% macro spark__create_or_replace_clone(this_relation, defer_relation) %} + create or replace table {{ this_relation }} shallow clone {{ defer_relation }} +{% endmacro %} + +{%- materialization clone, adapter='spark' -%} + + {%- set relations = {'relations': []} -%} + + {%- if not defer_relation -%} + -- nothing to do + {{ log("No relation found in state manifest for " ~ model.unique_id, info=True) }} + {{ return(relations) }} + {%- endif -%} + + {%- set existing_relation = load_cached_relation(this) -%} + + {%- if existing_relation and not flags.FULL_REFRESH -%} + -- noop! + {{ log("Relation " ~ existing_relation ~ " already exists", info=True) }} + {{ return(relations) }} + {%- endif -%} + + {%- set other_existing_relation = load_cached_relation(defer_relation) -%} + {%- set file_format = config.get('file_format', validator=validation.any[basestring]) -%} + + -- If this is a database that can do zero-copy cloning of tables, and the other relation is a table, then this will be a table + -- Otherwise, this will be a view + + {% set can_clone_table = can_clone_table() %} + + {%- if file_format != 'delta' -%} + {% set invalid_format_msg -%} + Invalid file format: {{ file_format }} + shallow clone requires file_format be set to 'delta' + {%- endset %} + {% do exceptions.raise_compiler_error(invalid_format_msg) %} + {%- elif other_existing_relation and other_existing_relation.type == 'table' and can_clone_table -%} + + {%- set target_relation = this.incorporate(type='table') -%} + {% if existing_relation is not none and not existing_relation.is_table %} + {{ log("Dropping relation " ~ existing_relation ~ " because it is of type " ~ existing_relation.type) }} + {{ drop_relation_if_exists(existing_relation) }} + {% endif %} + + -- as a general rule, data platforms that can clone tables can also do atomic 'create or replace' + {% call statement('main') %} + {{ create_or_replace_clone(target_relation, defer_relation) }} + {% endcall %} + + {% set should_revoke = should_revoke(existing_relation, full_refresh_mode=True) %} + {% do apply_grants(target_relation, grant_config, should_revoke=should_revoke) %} + {% do persist_docs(target_relation, model) %} + + {{ return({'relations': [target_relation]}) }} + + {%- else -%} + + {%- set target_relation = this.incorporate(type='view') -%} + + -- reuse the view materialization + -- TODO: support actual dispatch for materialization macros + -- Tracking ticket: https://github.com/dbt-labs/dbt-core/issues/7799 + {% set search_name = "materialization_view_" ~ adapter.type() %} + {% if not search_name in context %} + {% set search_name = "materialization_view_default" %} + {% endif %} + {% set materialization_macro = context[search_name] %} + {% set relations = materialization_macro() %} + {{ return(relations) }} + {% endif %} + +{%- endmaterialization -%} diff --git a/dbt/include/spark/macros/materializations/incremental/incremental.sql b/dbt/include/spark/macros/materializations/incremental/incremental.sql index cc46d4c14..10d4f3ed8 100644 --- a/dbt/include/spark/macros/materializations/incremental/incremental.sql +++ b/dbt/include/spark/macros/materializations/incremental/incremental.sql @@ -39,6 +39,7 @@ {%- call statement('main', language=language) -%} {{ create_table_as(False, target_relation, compiled_code, language) }} {%- endcall -%} + {% do persist_constraints(target_relation, model) %} {%- elif existing_relation.is_view or should_full_refresh() -%} {#-- Relation must be dropped & recreated --#} {% set is_delta = (file_format == 'delta' and existing_relation.is_delta) %} @@ -48,6 +49,7 @@ {%- call statement('main', language=language) -%} {{ create_table_as(False, target_relation, compiled_code, language) }} {%- endcall -%} + {% do persist_constraints(target_relation, model) %} {%- else -%} {#-- Relation must be merged --#} {%- call statement('create_tmp_relation', language=language) -%} @@ -55,7 +57,7 @@ {%- endcall -%} {%- do process_schema_changes(on_schema_change, tmp_relation, existing_relation) -%} {%- call statement('main') -%} - {{ dbt_spark_get_incremental_sql(strategy, tmp_relation, target_relation, unique_key, incremental_predicates) }} + {{ dbt_spark_get_incremental_sql(strategy, tmp_relation, target_relation, existing_relation, unique_key, incremental_predicates) }} {%- endcall -%} {%- if language == 'python' -%} {#-- @@ -63,7 +65,7 @@ See note in dbt-spark/dbt/include/spark/macros/adapters.sql re: python models and temporary views. - Also, why doesn't either drop_relation or adapter.drop_relation work here?! + Also, why do neither drop_relation or adapter.drop_relation work here?! --#} {% call statement('drop_relation') -%} drop table if exists {{ tmp_relation }} diff --git a/dbt/include/spark/macros/materializations/incremental/strategies.sql b/dbt/include/spark/macros/materializations/incremental/strategies.sql index facfaadff..eeb920493 100644 --- a/dbt/include/spark/macros/materializations/incremental/strategies.sql +++ b/dbt/include/spark/macros/materializations/incremental/strategies.sql @@ -1,9 +1,15 @@ -{% macro get_insert_overwrite_sql(source_relation, target_relation) %} +{% macro get_insert_overwrite_sql(source_relation, target_relation, existing_relation) %} {%- set dest_columns = adapter.get_columns_in_relation(target_relation) -%} {%- set dest_cols_csv = dest_columns | map(attribute='quoted') | join(', ') -%} - insert overwrite table {{ target_relation }} - {{ partition_cols(label="partition") }} + {% if existing_relation.is_iceberg %} + {# removed table from statement for iceberg #} + insert overwrite {{ target_relation }} + {# removed partition_cols for iceberg as well #} + {% else %} + insert overwrite table {{ target_relation }} + {{ partition_cols(label="partition") }} + {% endif %} select {{dest_cols_csv}} from {{ source_relation }} {% endmacro %} @@ -62,15 +68,15 @@ {% endmacro %} -{% macro dbt_spark_get_incremental_sql(strategy, source, target, unique_key, incremental_predicates) %} +{% macro dbt_spark_get_incremental_sql(strategy, source, target, existing, unique_key, incremental_predicates) %} {%- if strategy == 'append' -%} {#-- insert new records into existing table, without updating or overwriting #} {{ get_insert_into_sql(source, target) }} {%- elif strategy == 'insert_overwrite' -%} {#-- insert statements don't like CTEs, so support them via a temp view #} - {{ get_insert_overwrite_sql(source, target) }} + {{ get_insert_overwrite_sql(source, target, existing) }} {%- elif strategy == 'merge' -%} - {#-- merge all columns with databricks delta - schema changes are handled for us #} + {#-- merge all columns for datasources which implement MERGE INTO (e.g. databricks, iceberg) - schema changes are handled for us #} {{ get_merge_sql(target, source, unique_key, dest_columns=none, incremental_predicates=incremental_predicates) }} {%- else -%} {% set no_sql_for_strategy_msg -%} diff --git a/dbt/include/spark/macros/materializations/incremental/validate.sql b/dbt/include/spark/macros/materializations/incremental/validate.sql index ffd56f106..88b851ca4 100644 --- a/dbt/include/spark/macros/materializations/incremental/validate.sql +++ b/dbt/include/spark/macros/materializations/incremental/validate.sql @@ -1,7 +1,7 @@ {% macro dbt_spark_validate_get_file_format(raw_file_format) %} {#-- Validate the file format #} - {% set accepted_formats = ['text', 'csv', 'json', 'jdbc', 'parquet', 'orc', 'hive', 'delta', 'libsvm', 'hudi'] %} + {% set accepted_formats = ['text', 'csv', 'json', 'jdbc', 'parquet', 'orc', 'hive', 'delta', 'iceberg', 'libsvm', 'hudi'] %} {% set invalid_file_format_msg -%} Invalid file format provided: {{ raw_file_format }} @@ -26,12 +26,12 @@ {% set invalid_merge_msg -%} Invalid incremental strategy provided: {{ raw_strategy }} - You can only choose this strategy when file_format is set to 'delta' or 'hudi' + You can only choose this strategy when file_format is set to 'delta' or 'iceberg' or 'hudi' {%- endset %} {% set invalid_insert_overwrite_delta_msg -%} Invalid incremental strategy provided: {{ raw_strategy }} - You cannot use this strategy when file_format is set to 'delta' + You cannot use this strategy when file_format is set to 'delta' or 'iceberg' Use the 'append' or 'merge' strategy instead {%- endset %} @@ -40,16 +40,12 @@ You cannot use this strategy when connecting via endpoint Use the 'append' or 'merge' strategy instead {%- endset %} - {% if raw_strategy not in ['append', 'merge', 'insert_overwrite'] %} {% do exceptions.raise_compiler_error(invalid_strategy_msg) %} {%-else %} - {% if raw_strategy == 'merge' and file_format not in ['delta', 'hudi'] %} + {% if raw_strategy == 'merge' and file_format not in ['delta', 'iceberg', 'hudi'] %} {% do exceptions.raise_compiler_error(invalid_merge_msg) %} {% endif %} - {% if raw_strategy == 'insert_overwrite' and file_format == 'delta' %} - {% do exceptions.raise_compiler_error(invalid_insert_overwrite_delta_msg) %} - {% endif %} {% if raw_strategy == 'insert_overwrite' and target.endpoint %} {% do exceptions.raise_compiler_error(invalid_insert_overwrite_endpoint_msg) %} {% endif %} diff --git a/dbt/include/spark/macros/materializations/snapshot.sql b/dbt/include/spark/macros/materializations/snapshot.sql index 6cf2358fe..a397f84e5 100644 --- a/dbt/include/spark/macros/materializations/snapshot.sql +++ b/dbt/include/spark/macros/materializations/snapshot.sql @@ -15,7 +15,12 @@ {% macro spark__snapshot_merge_sql(target, source, insert_cols) -%} merge into {{ target }} as DBT_INTERNAL_DEST - using {{ source }} as DBT_INTERNAL_SOURCE + {% if target.is_iceberg %} + {# create view only supports a name (no catalog, or schema) #} + using {{ source.identifier }} as DBT_INTERNAL_SOURCE + {% else %} + using {{ source }} as DBT_INTERNAL_SOURCE + {% endif %} on DBT_INTERNAL_SOURCE.dbt_scd_id = DBT_INTERNAL_DEST.dbt_scd_id when matched and DBT_INTERNAL_DEST.dbt_valid_to is null @@ -33,10 +38,18 @@ {% macro spark_build_snapshot_staging_table(strategy, sql, target_relation) %} {% set tmp_identifier = target_relation.identifier ~ '__dbt_tmp' %} - {%- set tmp_relation = api.Relation.create(identifier=tmp_identifier, - schema=target_relation.schema, - database=none, - type='view') -%} + {% if target_relation.is_iceberg %} + {# iceberg catalog does not support create view, but regular spark does. We removed the catalog and schema #} + {%- set tmp_relation = api.Relation.create(identifier=tmp_identifier, + schema=none, + database=none, + type='view') -%} + {% else %} + {%- set tmp_relation = api.Relation.create(identifier=tmp_identifier, + schema=target_relation.schema, + database=none, + type='view') -%} + {% endif %} {% set select = snapshot_staging_table(strategy, sql, target_relation) %} @@ -83,25 +96,25 @@ identifier=target_table, type='table') -%} - {%- if file_format not in ['delta', 'hudi'] -%} + {%- if file_format not in ['delta', 'iceberg', 'hudi'] -%} {% set invalid_format_msg -%} Invalid file format: {{ file_format }} - Snapshot functionality requires file_format be set to 'delta' or 'hudi' + Snapshot functionality requires file_format be set to 'delta' or 'iceberg' or 'hudi' {%- endset %} {% do exceptions.raise_compiler_error(invalid_format_msg) %} {% endif %} {%- if target_relation_exists -%} - {%- if not target_relation.is_delta and not target_relation.is_hudi -%} + {%- if not target_relation.is_delta and not target_relation.is_iceberg and not target_relation.is_hudi -%} {% set invalid_format_msg -%} - The existing table {{ model.schema }}.{{ target_table }} is in another format than 'delta' or 'hudi' + The existing table {{ model.schema }}.{{ target_table }} is in another format than 'delta' or 'iceberg' or 'hudi' {%- endset %} {% do exceptions.raise_compiler_error(invalid_format_msg) %} {% endif %} {% endif %} {% if not adapter.check_schema_exists(model.database, model.schema) %} - {% do create_schema(model.database, model.schema) %} + {% do create_schema(model.schema) %} {% endif %} {%- if not target_relation.is_table -%} diff --git a/dbt/include/spark/macros/materializations/table.sql b/dbt/include/spark/macros/materializations/table.sql index c82e27e9c..39a9caba9 100644 --- a/dbt/include/spark/macros/materializations/table.sql +++ b/dbt/include/spark/macros/materializations/table.sql @@ -12,14 +12,24 @@ {{ run_hooks(pre_hooks) }} -- setup: if the target relation already exists, drop it - -- in case if the existing and future table is delta, we want to do a + -- in case if the existing and future table is delta or iceberg, we want to do a -- create or replace table instead of dropping, so we don't have the table unavailable - {% if old_relation and not (old_relation.is_delta and config.get('file_format', validator=validation.any[basestring]) == 'delta') -%} - {{ adapter.drop_relation(old_relation) }} - {%- endif %} + {% if old_relation is not none %} + {% set is_delta = (old_relation.is_delta and config.get('file_format', validator=validation.any[basestring]) == 'delta') %} + {% set is_iceberg = (old_relation.is_iceberg and config.get('file_format', validator=validation.any[basestring]) == 'iceberg') %} + {% set old_relation_type = old_relation.type %} + {% else %} + {% set is_delta = false %} + {% set is_iceberg = false %} + {% set old_relation_type = target_relation.type %} + {% endif %} + + {% if not is_delta and not is_iceberg %} + {% set existing_relation = target_relation %} + {{ adapter.drop_relation(existing_relation.incorporate(type=old_relation_type)) }} + {% endif %} -- build model - {%- call statement('main', language=language) -%} {{ create_table_as(False, target_relation, compiled_code, language) }} {%- endcall -%} @@ -29,6 +39,8 @@ {% do persist_docs(target_relation, model) %} + {% do persist_constraints(target_relation, model) %} + {{ run_hooks(post_hooks) }} {{ return({'relations': [target_relation]})}} diff --git a/dbt/include/spark/macros/utils/split_part.sql b/dbt/include/spark/macros/utils/split_part.sql index d5ae30924..2da033760 100644 --- a/dbt/include/spark/macros/utils/split_part.sql +++ b/dbt/include/spark/macros/utils/split_part.sql @@ -9,14 +9,34 @@ {% endset %} - {% set split_part_expr %} + {% if part_number >= 0 %} - split( - {{ string_text }}, - {{ delimiter_expr }} - )[({{ part_number - 1 }})] + {% set split_part_expr %} - {% endset %} + split( + {{ string_text }}, + {{ delimiter_expr }} + )[({{ part_number - 1 if part_number > 0 else part_number }})] + + {% endset %} + + {% else %} + + {% set split_part_expr %} + + split( + {{ string_text }}, + {{ delimiter_expr }} + )[( + length({{ string_text }}) + - length( + replace({{ string_text }}, {{ delimiter_text }}, '') + ) + 1 + {{ part_number }} + )] + + {% endset %} + + {% endif %} {{ return(split_part_expr) }} diff --git a/dev-requirements.txt b/dev-requirements.txt index 58009d04c..11369654a 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -3,23 +3,32 @@ git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-core&subdirectory=core git+https://github.com/dbt-labs/dbt-core.git#egg=dbt-tests-adapter&subdirectory=tests/adapter -black==22.8.0 -bumpversion -click~=8.1.3 -flake8 -flaky -freezegun==0.3.9 -ipdb -mock>=1.3.0 -mypy==0.971 -pre-commit -pytest-csv -pytest-dotenv -pytest-xdist -pytest>=6.0.2 -pytz -tox>=3.2.0 +# if version 1.x or greater -> pin to major version +# if version 0.x -> pin to minor +black~=23.3 +bumpversion~=0.6.0 +click~=8.1 +flake8~=6.1;python_version>="3.8" +flaky~=3.7 +freezegun~=1.2 +ipdb~=0.13.13 +mypy==1.5.0 # patch updates have historically introduced breaking changes +pip-tools~=7.3 +pre-commit~=3.3 +pre-commit-hooks~=4.4 +pytest~=7.4 +pytest-csv~=3.0 +pytest-dotenv~=0.5.2 +pytest-logbook~=1.2 +pytest-xdist~=3.3 +pytz~=2023.3 +tox~=4.8 +types-pytz~=2023.3 +types-requests~=2.29 +twine~=4.0 +wheel~=0.41 -# Test requirements -sasl>=0.2.1 -thrift_sasl==0.4.3 +# Adapter specific dependencies +mock~=5.0 +sasl~=0.3.1 +thrift_sasl~=0.4.3 diff --git a/pytest.ini b/pytest.ini index b04a6ccf3..b3d74bc14 100644 --- a/pytest.ini +++ b/pytest.ini @@ -6,5 +6,4 @@ env_files = test.env testpaths = tests/unit - tests/integration tests/functional diff --git a/requirements.txt b/requirements.txt index 14b36b723..e58ecdd4b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,10 @@ PyHive[hive]>=0.6.0,<0.7.0 requests[python]>=2.28.1 -pyodbc==4.0.34 +pyodbc~=4.0.39 sqlparams>=3.0.0 thrift>=0.13.0 sqlparse>=0.4.2 # not directly required, pinned by Snyk to avoid a vulnerability + +types-PyYAML +types-python-dateutil diff --git a/setup.py b/setup.py index bfa377453..c6713e895 100644 --- a/setup.py +++ b/setup.py @@ -3,10 +3,10 @@ import sys import re -# require python 3.7 or newer -if sys.version_info < (3, 7): +# require python 3.8 or newer +if sys.version_info < (3, 8): print("Error: dbt does not support this version of Python.") - print("Please upgrade to Python 3.7 or higher.") + print("Please upgrade to Python 3.8 or higher.") sys.exit(1) @@ -33,7 +33,7 @@ def _get_plugin_version_dict(): _version_path = os.path.join(this_directory, "dbt", "adapters", "spark", "__version__.py") _semver = r"""(?P\d+)\.(?P\d+)\.(?P\d+)""" _pre = r"""((?Pa|b|rc)(?P
\d+))?"""
-    _version_pattern = fr"""version\s*=\s*["']{_semver}{_pre}["']"""
+    _version_pattern = rf"""version\s*=\s*["']{_semver}{_pre}["']"""
     with open(_version_path) as f:
         match = re.search(_version_pattern, f.read().strip())
         if match is None:
@@ -50,11 +50,11 @@ def _get_dbt_core_version():
 
 
 package_name = "dbt-spark"
-package_version = "1.5.0a1"
+package_version = "1.7.0a1"
 dbt_core_version = _get_dbt_core_version()
 description = """The Apache Spark adapter plugin for dbt"""
 
-odbc_extras = ["pyodbc>=4.0.30"]
+odbc_extras = ["pyodbc~=4.0.30"]
 pyhive_extras = [
     "PyHive[hive]>=0.6.0,<0.7.0",
     "thrift>=0.11.0,<0.17.0",
@@ -90,10 +90,9 @@ def _get_dbt_core_version():
         "Operating System :: Microsoft :: Windows",
         "Operating System :: MacOS :: MacOS X",
         "Operating System :: POSIX :: Linux",
-        "Programming Language :: Python :: 3.7",
         "Programming Language :: Python :: 3.8",
         "Programming Language :: Python :: 3.9",
         "Programming Language :: Python :: 3.10",
     ],
-    python_requires=">=3.7",
+    python_requires=">=3.8",
 )
diff --git a/tests/conftest.py b/tests/conftest.py
index 2fa50d6c7..94969e406 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -60,7 +60,7 @@ def databricks_cluster_target():
         "connect_retries": 3,
         "connect_timeout": 5,
         "retry_all": True,
-        "user": os.getenv('DBT_DATABRICKS_USER'),
+        "user": os.getenv("DBT_DATABRICKS_USER"),
     }
 
 
@@ -82,17 +82,17 @@ def databricks_sql_endpoint_target():
 def databricks_http_cluster_target():
     return {
         "type": "spark",
-        "host": os.getenv('DBT_DATABRICKS_HOST_NAME'),
-        "cluster": os.getenv('DBT_DATABRICKS_CLUSTER_NAME'),
-        "token": os.getenv('DBT_DATABRICKS_TOKEN'),
+        "host": os.getenv("DBT_DATABRICKS_HOST_NAME"),
+        "cluster": os.getenv("DBT_DATABRICKS_CLUSTER_NAME"),
+        "token": os.getenv("DBT_DATABRICKS_TOKEN"),
         "method": "http",
         "port": 443,
         # more retries + longer timout to handle unavailability while cluster is restarting
         # return failures quickly in dev, retry all failures in CI (up to 5 min)
         "connect_retries": 5,
-        "connect_timeout": 60, 
-        "retry_all": bool(os.getenv('DBT_DATABRICKS_RETRY_ALL', False)),
-        "user": os.getenv('DBT_DATABRICKS_USER'),
+        "connect_timeout": 60,
+        "retry_all": bool(os.getenv("DBT_DATABRICKS_RETRY_ALL", False)),
+        "user": os.getenv("DBT_DATABRICKS_USER"),
     }
 
 
diff --git a/tests/functional/adapter/dbt_clone/fixtures.py b/tests/functional/adapter/dbt_clone/fixtures.py
new file mode 100644
index 000000000..a4bb12a46
--- /dev/null
+++ b/tests/functional/adapter/dbt_clone/fixtures.py
@@ -0,0 +1,101 @@
+seed_csv = """id,name
+1,Alice
+2,Bob
+"""
+
+table_model_sql = """
+{{ config(materialized='table') }}
+select * from {{ ref('ephemeral_model') }}
+-- establish a macro dependency to trigger state:modified.macros
+-- depends on: {{ my_macro() }}
+"""
+
+view_model_sql = """
+{{ config(materialized='view') }}
+select * from {{ ref('seed') }}
+-- establish a macro dependency that trips infinite recursion if not handled
+-- depends on: {{ my_infinitely_recursive_macro() }}
+"""
+
+ephemeral_model_sql = """
+{{ config(materialized='ephemeral') }}
+select * from {{ ref('view_model') }}
+"""
+
+exposures_yml = """
+version: 2
+exposures:
+  - name: my_exposure
+    type: application
+    depends_on:
+      - ref('view_model')
+    owner:
+      email: test@example.com
+"""
+
+schema_yml = """
+version: 2
+models:
+  - name: view_model
+    columns:
+      - name: id
+        tests:
+          - unique:
+              severity: error
+          - not_null
+      - name: name
+"""
+
+get_schema_name_sql = """
+{% macro generate_schema_name(custom_schema_name, node) -%}
+    {%- set default_schema = target.schema -%}
+    {%- if custom_schema_name is not none -%}
+        {{ return(default_schema ~ '_' ~ custom_schema_name|trim) }}
+    -- put seeds into a separate schema in "prod", to verify that cloning in "dev" still works
+    {%- elif target.name == 'default' and node.resource_type == 'seed' -%}
+        {{ return(default_schema ~ '_' ~ 'seeds') }}
+    {%- else -%}
+        {{ return(default_schema) }}
+    {%- endif -%}
+{%- endmacro %}
+"""
+
+snapshot_sql = """
+{% snapshot my_cool_snapshot %}
+    {{
+        config(
+            target_database=database,
+            target_schema=schema,
+            unique_key='id',
+            strategy='check',
+            check_cols=['id'],
+        )
+    }}
+    select * from {{ ref('view_model') }}
+{% endsnapshot %}
+"""
+macros_sql = """
+{% macro my_macro() %}
+    {% do log('in a macro' ) %}
+{% endmacro %}
+"""
+
+infinite_macros_sql = """
+{# trigger infinite recursion if not handled #}
+{% macro my_infinitely_recursive_macro() %}
+  {{ return(adapter.dispatch('my_infinitely_recursive_macro')()) }}
+{% endmacro %}
+{% macro default__my_infinitely_recursive_macro() %}
+    {% if unmet_condition %}
+        {{ my_infinitely_recursive_macro() }}
+    {% else %}
+        {{ return('') }}
+    {% endif %}
+{% endmacro %}
+"""
+
+custom_can_clone_tables_false_macros_sql = """
+{% macro can_clone_table() %}
+    {{ return(False) }}
+{% endmacro %}
+"""
diff --git a/tests/functional/adapter/dbt_clone/test_dbt_clone.py b/tests/functional/adapter/dbt_clone/test_dbt_clone.py
new file mode 100644
index 000000000..a5e8d70e0
--- /dev/null
+++ b/tests/functional/adapter/dbt_clone/test_dbt_clone.py
@@ -0,0 +1,80 @@
+import pytest
+from dbt.tests.adapter.dbt_clone.test_dbt_clone import BaseClonePossible
+from tests.functional.adapter.dbt_clone.fixtures import (
+    seed_csv,
+    table_model_sql,
+    view_model_sql,
+    ephemeral_model_sql,
+    exposures_yml,
+    schema_yml,
+    snapshot_sql,
+    get_schema_name_sql,
+    macros_sql,
+    infinite_macros_sql,
+)
+
+
+@pytest.mark.skip_profile("apache_spark", "spark_session")
+class TestSparkBigqueryClonePossible(BaseClonePossible):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "table_model.sql": table_model_sql,
+            "view_model.sql": view_model_sql,
+            "ephemeral_model.sql": ephemeral_model_sql,
+            "schema.yml": schema_yml,
+            "exposures.yml": exposures_yml,
+        }
+
+    @pytest.fixture(scope="class")
+    def macros(self):
+        return {
+            "macros.sql": macros_sql,
+            "infinite_macros.sql": infinite_macros_sql,
+            "get_schema_name.sql": get_schema_name_sql,
+        }
+
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {
+            "seed.csv": seed_csv,
+        }
+
+    @pytest.fixture(scope="class")
+    def snapshots(self):
+        return {
+            "snapshot.sql": snapshot_sql,
+        }
+
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": {
+                "+file_format": "delta",
+            },
+            "seeds": {
+                "test": {
+                    "quote_columns": False,
+                },
+                "+file_format": "delta",
+            },
+            "snapshots": {
+                "+file_format": "delta",
+            },
+        }
+
+    @pytest.fixture(autouse=True)
+    def clean_up(self, project):
+        yield
+        with project.adapter.connection_named("__test"):
+            relation = project.adapter.Relation.create(
+                database=project.database, schema=f"{project.test_schema}_seeds"
+            )
+            project.adapter.drop_schema(relation)
+
+            relation = project.adapter.Relation.create(
+                database=project.database, schema=project.test_schema
+            )
+            project.adapter.drop_schema(relation)
+
+    pass
diff --git a/tests/functional/adapter/incremental/test_incremental_merge_exclude_columns.py b/tests/functional/adapter/incremental/test_incremental_merge_exclude_columns.py
index e3b07f030..7560b25ce 100644
--- a/tests/functional/adapter/incremental/test_incremental_merge_exclude_columns.py
+++ b/tests/functional/adapter/incremental/test_incremental_merge_exclude_columns.py
@@ -1,14 +1,12 @@
 import pytest
 
-from dbt.tests.util import run_dbt
-from dbt.tests.adapter.incremental.test_incremental_merge_exclude_columns import BaseMergeExcludeColumns
+from dbt.tests.adapter.incremental.test_incremental_merge_exclude_columns import (
+    BaseMergeExcludeColumns,
+)
 
-@pytest.mark.skip_profile('spark_session', 'apache_spark')
+
+@pytest.mark.skip_profile("spark_session", "apache_spark")
 class TestMergeExcludeColumns(BaseMergeExcludeColumns):
     @pytest.fixture(scope="class")
     def project_config_update(self):
-        return {
-            "models": {
-                "+file_format": "delta"
-            }
-        }
+        return {"models": {"+file_format": "delta"}}
diff --git a/tests/functional/adapter/incremental/test_incremental_on_schema_change.py b/tests/functional/adapter/incremental/test_incremental_on_schema_change.py
index 974edd261..478329668 100644
--- a/tests/functional/adapter/incremental/test_incremental_on_schema_change.py
+++ b/tests/functional/adapter/incremental/test_incremental_on_schema_change.py
@@ -2,7 +2,9 @@
 
 from dbt.tests.util import run_dbt
 
-from dbt.tests.adapter.incremental.test_incremental_on_schema_change import BaseIncrementalOnSchemaChangeSetup
+from dbt.tests.adapter.incremental.test_incremental_on_schema_change import (
+    BaseIncrementalOnSchemaChangeSetup,
+)
 
 
 class IncrementalOnSchemaChangeIgnoreFail(BaseIncrementalOnSchemaChangeSetup):
@@ -57,20 +59,16 @@ def project_config_update(self):
 
     def run_incremental_sync_all_columns(self, project):
         select = "model_a incremental_sync_all_columns incremental_sync_all_columns_target"
-        compare_source = "incremental_sync_all_columns"
-        compare_target = "incremental_sync_all_columns_target"
         run_dbt(["run", "--models", select, "--full-refresh"])
         # Delta Lake doesn"t support removing columns -- show a nice compilation error
-        results = run_dbt(["run", "--models", select], expect_pass = False)
+        results = run_dbt(["run", "--models", select], expect_pass=False)
         assert "Compilation Error" in results[1].message
-        
+
     def run_incremental_sync_remove_only(self, project):
         select = "model_a incremental_sync_remove_only incremental_sync_remove_only_target"
-        compare_source = "incremental_sync_remove_only"
-        compare_target = "incremental_sync_remove_only_target"
         run_dbt(["run", "--models", select, "--full-refresh"])
         # Delta Lake doesn"t support removing columns -- show a nice compilation error
-        results = run_dbt(["run", "--models", select], expect_pass = False)
+        results = run_dbt(["run", "--models", select], expect_pass=False)
         assert "Compilation Error" in results[1].message
 
     def test_run_incremental_append_new_columns(self, project):
diff --git a/tests/functional/adapter/incremental/test_incremental_predicates.py b/tests/functional/adapter/incremental/test_incremental_predicates.py
index 59c6b3538..52c01a747 100644
--- a/tests/functional/adapter/incremental/test_incremental_predicates.py
+++ b/tests/functional/adapter/incremental/test_incremental_predicates.py
@@ -26,42 +26,40 @@
 {% endif %}
 """
 
-@pytest.mark.skip_profile('spark_session', 'apache_spark')
+
+@pytest.mark.skip_profile("spark_session", "apache_spark")
 class TestIncrementalPredicatesMergeSpark(BaseIncrementalPredicates):
     @pytest.fixture(scope="class")
     def project_config_update(self):
         return {
-            "models": { 
-                "+incremental_predicates": [
-                    "dbt_internal_dest.id != 2"
-                ],
+            "models": {
+                "+incremental_predicates": ["dbt_internal_dest.id != 2"],
                 "+incremental_strategy": "merge",
-                "+file_format": "delta"
+                "+file_format": "delta",
             }
         }
-        
+
     @pytest.fixture(scope="class")
     def models(self):
         return {
             "delete_insert_incremental_predicates.sql": models__spark_incremental_predicates_sql
         }
 
-@pytest.mark.skip_profile('spark_session', 'apache_spark')
+
+@pytest.mark.skip_profile("spark_session", "apache_spark")
 class TestPredicatesMergeSpark(BaseIncrementalPredicates):
     @pytest.fixture(scope="class")
     def project_config_update(self):
         return {
-            "models": { 
-                "+predicates": [
-                    "dbt_internal_dest.id != 2"
-                ],
+            "models": {
+                "+predicates": ["dbt_internal_dest.id != 2"],
                 "+incremental_strategy": "merge",
-                "+file_format": "delta"
+                "+file_format": "delta",
             }
         }
-        
+
     @pytest.fixture(scope="class")
     def models(self):
         return {
             "delete_insert_incremental_predicates.sql": models__spark_incremental_predicates_sql
-        }
\ No newline at end of file
+        }
diff --git a/tests/functional/adapter/incremental/test_incremental_unique_id.py b/tests/functional/adapter/incremental/test_incremental_unique_id.py
index 18bac3f39..de8cb652c 100644
--- a/tests/functional/adapter/incremental/test_incremental_unique_id.py
+++ b/tests/functional/adapter/incremental/test_incremental_unique_id.py
@@ -1,7 +1,8 @@
 import pytest
 from dbt.tests.adapter.incremental.test_incremental_unique_id import BaseIncrementalUniqueKey
 
-@pytest.mark.skip_profile('spark_session', 'apache_spark')
+
+@pytest.mark.skip_profile("spark_session", "apache_spark")
 class TestUniqueKeySpark(BaseIncrementalUniqueKey):
     @pytest.fixture(scope="class")
     def project_config_update(self):
@@ -10,4 +11,4 @@ def project_config_update(self):
                 "+file_format": "delta",
                 "+incremental_strategy": "merge",
             }
-        }
\ No newline at end of file
+        }
diff --git a/tests/functional/adapter/incremental_strategies/fixtures.py b/tests/functional/adapter/incremental_strategies/fixtures.py
new file mode 100644
index 000000000..9cee477df
--- /dev/null
+++ b/tests/functional/adapter/incremental_strategies/fixtures.py
@@ -0,0 +1,400 @@
+#
+# Models
+#
+
+default_append_sql = """
+{{ config(
+    materialized = 'incremental',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+#
+# Bad Models
+#
+
+bad_file_format_sql = """
+{{ config(
+    materialized = 'incremental',
+    file_format = 'something_else',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+bad_merge_not_delta_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'merge',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+bad_strategy_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'something_else',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+#
+# Delta Models
+#
+
+append_delta_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'append',
+    file_format = 'delta',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+insert_overwrite_partitions_delta_sql = """
+{{ config(
+    materialized='incremental',
+    incremental_strategy='insert_overwrite',
+    partition_by='id',
+    file_format='delta'
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+"""
+
+
+delta_merge_no_key_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'merge',
+    file_format = 'delta',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+delta_merge_unique_key_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'merge',
+    file_format = 'delta',
+    unique_key = 'id',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+delta_merge_update_columns_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'merge',
+    file_format = 'delta',
+    unique_key = 'id',
+    merge_update_columns = ['msg'],
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg, 'blue' as color
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg, 'red' as color
+
+{% else %}
+
+-- msg will be updated, color will be ignored
+select cast(2 as bigint) as id, 'yo' as msg, 'green' as color
+union all
+select cast(3 as bigint) as id, 'anyway' as msg, 'purple' as color
+
+{% endif %}
+""".lstrip()
+
+#
+# Insert Overwrite
+#
+
+insert_overwrite_no_partitions_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'insert_overwrite',
+    file_format = 'parquet',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+insert_overwrite_partitions_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'insert_overwrite',
+    partition_by = 'id',
+    file_format = 'parquet',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+#
+# Hudi Models
+#
+
+append_hudi_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'append',
+    file_format = 'hudi',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+hudi_insert_overwrite_no_partitions_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'insert_overwrite',
+    file_format = 'hudi',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+hudi_insert_overwrite_partitions_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'insert_overwrite',
+    partition_by = 'id',
+    file_format = 'hudi',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+hudi_merge_no_key_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'merge',
+    file_format = 'hudi',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+hudi_merge_unique_key_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'merge',
+    file_format = 'hudi',
+    unique_key = 'id',
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg
+
+{% else %}
+
+select cast(2 as bigint) as id, 'yo' as msg
+union all
+select cast(3 as bigint) as id, 'anyway' as msg
+
+{% endif %}
+""".lstrip()
+
+hudi_update_columns_sql = """
+{{ config(
+    materialized = 'incremental',
+    incremental_strategy = 'merge',
+    file_format = 'hudi',
+    unique_key = 'id',
+    merge_update_columns = ['msg'],
+) }}
+
+{% if not is_incremental() %}
+
+select cast(1 as bigint) as id, 'hello' as msg, 'blue' as color
+union all
+select cast(2 as bigint) as id, 'goodbye' as msg, 'red' as color
+
+{% else %}
+
+-- msg will be updated, color will be ignored
+select cast(2 as bigint) as id, 'yo' as msg, 'green' as color
+union all
+select cast(3 as bigint) as id, 'anyway' as msg, 'purple' as color
+
+{% endif %}
+""".lstrip()
diff --git a/tests/functional/adapter/incremental_strategies/seeds.py b/tests/functional/adapter/incremental_strategies/seeds.py
new file mode 100644
index 000000000..c27561e04
--- /dev/null
+++ b/tests/functional/adapter/incremental_strategies/seeds.py
@@ -0,0 +1,27 @@
+expected_append_csv = """
+id,msg
+1,hello
+2,goodbye
+2,yo
+3,anyway
+""".lstrip()
+
+expected_overwrite_csv = """
+id,msg
+2,yo
+3,anyway
+""".lstrip()
+
+expected_partial_upsert_csv = """
+id,msg,color
+1,hello,blue
+2,yo,red
+3,anyway,purple
+""".lstrip()
+
+expected_upsert_csv = """
+id,msg
+1,hello
+2,yo
+3,anyway
+""".lstrip()
diff --git a/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py b/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py
new file mode 100644
index 000000000..b05fcb279
--- /dev/null
+++ b/tests/functional/adapter/incremental_strategies/test_incremental_strategies.py
@@ -0,0 +1,140 @@
+import pytest
+
+from dbt.tests.util import run_dbt, check_relations_equal
+from dbt.tests.adapter.simple_seed.test_seed import SeedConfigBase
+from tests.functional.adapter.incremental_strategies.seeds import (
+    expected_append_csv,
+    expected_overwrite_csv,
+    expected_upsert_csv,
+    expected_partial_upsert_csv,
+)
+from tests.functional.adapter.incremental_strategies.fixtures import (
+    bad_file_format_sql,
+    bad_merge_not_delta_sql,
+    bad_strategy_sql,
+    default_append_sql,
+    insert_overwrite_no_partitions_sql,
+    insert_overwrite_partitions_sql,
+    append_delta_sql,
+    delta_merge_no_key_sql,
+    delta_merge_unique_key_sql,
+    delta_merge_update_columns_sql,
+    # Skip: CT-1873 insert_overwrite_partitions_delta_sql,
+)
+
+
+class BaseIncrementalStrategies(SeedConfigBase):
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {
+            "expected_append.csv": expected_append_csv,
+            "expected_overwrite.csv": expected_overwrite_csv,
+            "expected_upsert.csv": expected_upsert_csv,
+            "expected_partial_upsert.csv": expected_partial_upsert_csv,
+        }
+
+    @staticmethod
+    def seed_and_run_once():
+        run_dbt(["seed"])
+        run_dbt(["run"])
+
+    @staticmethod
+    def seed_and_run_twice():
+        run_dbt(["seed"])
+        run_dbt(["run"])
+        run_dbt(["run"])
+
+
+class TestDefaultAppend(BaseIncrementalStrategies):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {"default_append.sql": default_append_sql}
+
+    def run_and_test(self, project):
+        self.seed_and_run_twice()
+        check_relations_equal(project.adapter, ["default_append", "expected_append"])
+
+    @pytest.mark.skip_profile(
+        "databricks_http_cluster", "databricks_sql_endpoint", "spark_session"
+    )
+    def test_default_append(self, project):
+        self.run_and_test(project)
+
+
+class TestInsertOverwrite(BaseIncrementalStrategies):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "insert_overwrite_no_partitions.sql": insert_overwrite_no_partitions_sql,
+            "insert_overwrite_partitions.sql": insert_overwrite_partitions_sql,
+        }
+
+    def run_and_test(self, project):
+        self.seed_and_run_twice()
+        check_relations_equal(
+            project.adapter, ["insert_overwrite_no_partitions", "expected_overwrite"]
+        )
+        check_relations_equal(project.adapter, ["insert_overwrite_partitions", "expected_upsert"])
+
+    @pytest.mark.skip_profile(
+        "databricks_http_cluster", "databricks_sql_endpoint", "spark_session"
+    )
+    def test_insert_overwrite(self, project):
+        self.run_and_test(project)
+
+
+class TestDeltaStrategies(BaseIncrementalStrategies):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "append_delta.sql": append_delta_sql,
+            "merge_no_key.sql": delta_merge_no_key_sql,
+            "merge_unique_key.sql": delta_merge_unique_key_sql,
+            "merge_update_columns.sql": delta_merge_update_columns_sql,
+            # Skip: cannot be acnive on any endpoint with grants
+            # "insert_overwrite_partitions_delta.sql": insert_overwrite_partitions_delta_sql,
+        }
+
+    def run_and_test(self, project):
+        self.seed_and_run_twice()
+        check_relations_equal(project.adapter, ["append_delta", "expected_append"])
+        check_relations_equal(project.adapter, ["merge_no_key", "expected_append"])
+        check_relations_equal(project.adapter, ["merge_unique_key", "expected_upsert"])
+        check_relations_equal(project.adapter, ["merge_update_columns", "expected_partial_upsert"])
+
+    @pytest.mark.skip_profile(
+        "apache_spark", "databricks_http_cluster", "databricks_sql_endpoint", "spark_session"
+    )
+    def test_delta_strategies(self, project):
+        self.run_and_test(project)
+
+    @pytest.mark.skip(
+        reason="this feature is incompatible with databricks settings required for grants"
+    )
+    def test_delta_strategies_overwrite(self, project):
+        self.seed_and_run_twice()
+        check_relations_equal(
+            project.adapter, ["insert_overwrite_partitions_delta", "expected_upsert"]
+        )
+
+
+class TestBadStrategies(BaseIncrementalStrategies):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "bad_file_format.sql": bad_file_format_sql,
+            "bad_merge_not_delta.sql": bad_merge_not_delta_sql,
+            "bad_strategy.sql": bad_strategy_sql,
+        }
+
+    @staticmethod
+    def run_and_test():
+        run_results = run_dbt(["run"], expect_pass=False)
+        # assert all models fail with compilation errors
+        for result in run_results:
+            assert result.status == "error"
+            assert "Compilation Error in model" in result.message
+
+    @pytest.mark.skip_profile("databricks_http_cluster", "spark_session")
+    def test_bad_strategies(self, project):
+        self.run_and_test()
diff --git a/tests/functional/adapter/persist_docs/test_persist_docs.py b/tests/functional/adapter/persist_docs/test_persist_docs.py
index 3ddab6df7..0e3d102dc 100644
--- a/tests/functional/adapter/persist_docs/test_persist_docs.py
+++ b/tests/functional/adapter/persist_docs/test_persist_docs.py
@@ -1,5 +1,3 @@
-import json
-import os
 import pytest
 
 from dbt.tests.util import run_dbt
@@ -23,61 +21,59 @@ def models(self):
             "incremental_delta_model.sql": _MODELS__INCREMENTAL_DELTA,
             "my_fun_docs.md": _MODELS__MY_FUN_DOCS,
             "table_delta_model.sql": _MODELS__TABLE_DELTA_MODEL,
-            "schema.yml": _PROPERTIES__MODELS
+            "schema.yml": _PROPERTIES__MODELS,
         }
 
     @pytest.fixture(scope="class")
     def seeds(self):
-        return {
-            "seed.csv": _SEEDS__BASIC,
-            "seed.yml": _PROPERTIES__SEEDS
-        }
-
+        return {"seed.csv": _SEEDS__BASIC, "seed.yml": _PROPERTIES__SEEDS}
 
     @pytest.fixture(scope="class")
     def project_config_update(self):
         return {
-            'models': {
-                'test': {
-                    '+persist_docs': {
+            "models": {
+                "test": {
+                    "+persist_docs": {
                         "relation": True,
                         "columns": True,
                     },
                 }
             },
-            'seeds': {
-                'test': {
-                    '+persist_docs': {
+            "seeds": {
+                "test": {
+                    "+persist_docs": {
                         "relation": True,
                         "columns": True,
                     },
-                    '+file_format': 'delta',
-                    '+quote_columns': True
+                    "+file_format": "delta",
+                    "+quote_columns": True,
                 }
             },
         }
 
     def test_delta_comments(self, project):
-        run_dbt(['seed'])
-        run_dbt(['run'])
+        run_dbt(["seed"])
+        run_dbt(["run"])
 
         for table, whatis in [
-            ('table_delta_model', 'Table'),
-            ('seed', 'Seed'),
-            ('incremental_delta_model', 'Incremental')
+            ("table_delta_model", "Table"),
+            ("seed", "Seed"),
+            ("incremental_delta_model", "Incremental"),
         ]:
             results = project.run_sql(
-                'describe extended {schema}.{table}'.format(schema=project.test_schema, table=table),
-                fetch='all'
+                "describe extended {schema}.{table}".format(
+                    schema=project.test_schema, table=table
+                ),
+                fetch="all",
             )
 
             for result in results:
-                if result[0] == 'Comment':
-                    assert result[1].startswith(f'{whatis} model description')
-                if result[0] == 'id':
-                    assert result[2].startswith('id Column description')
-                if result[0] == 'name':
-                    assert result[2].startswith('Some stuff here and then a call to')
+                if result[0] == "Comment":
+                    assert result[1].startswith(f"{whatis} model description")
+                if result[0] == "id":
+                    assert result[2].startswith("id Column description")
+                if result[0] == "name":
+                    assert result[2].startswith("Some stuff here and then a call to")
 
 
 @pytest.mark.skip_profile("apache_spark", "spark_session")
@@ -96,24 +92,21 @@ def project_config_update(self):
 
     @pytest.fixture(scope="class")
     def seeds(self):
-        return {
-            "seed.csv": _SEEDS__BASIC,
-            "seed.yml": _PROPERTIES__SEEDS
-        }
+        return {"seed.csv": _SEEDS__BASIC, "seed.yml": _PROPERTIES__SEEDS}
 
     @pytest.fixture(scope="class")
     def models(self):
         return {
             "table_delta_model.sql": _MODELS__TABLE_DELTA_MODEL_MISSING_COLUMN,
             "my_fun_docs.md": _MODELS__MY_FUN_DOCS,
-    }
+        }
 
     @pytest.fixture(scope="class")
     def properties(self):
         return {"schema.yml": _PROPERTIES__MODELS}
 
     def test_missing_column(self, project):
-        '''spark will use our schema to verify all columns exist rather than fail silently'''
+        """spark will use our schema to verify all columns exist rather than fail silently"""
         run_dbt(["seed"])
         res = run_dbt(["run"], expect_pass=False)
         assert "Missing field name in table" in res[0].message
diff --git a/tests/functional/adapter/seed_column_types/fixtures.py b/tests/functional/adapter/seed_column_types/fixtures.py
new file mode 100644
index 000000000..e002d57bb
--- /dev/null
+++ b/tests/functional/adapter/seed_column_types/fixtures.py
@@ -0,0 +1,113 @@
+_MACRO_TEST_IS_TYPE_SQL = """
+{% macro simple_type_check_column(column, check) %}
+    {% set checks = {
+        'string': column.is_string,
+        'float': column.is_float,
+        'number': column.is_number,
+        'numeric': column.is_numeric,
+        'integer': column.is_integer,
+    } %}
+
+    {{ return(checks[check]()) }}
+{% endmacro %}
+
+{% macro type_check_column(column, type_checks) %}
+    {% set failures = [] %}
+    {% for type_check in type_checks %}
+        {% if type_check.startswith('not ') %}
+            {% if simple_type_check_column(column, type_check[4:]) %}
+                {% do failures.append(type_check) %}
+            {% endif %}
+        {% else %}
+            {% if not simple_type_check_column(column, type_check) %}
+                {% do failures.append(type_check) %}
+            {% endif %}
+        {% endif %}
+    {% endfor %}
+
+    {% do return((failures | length) == 0) %}
+{% endmacro %}
+
+{% macro is_bad_column(column, column_map) %}
+    {% set column_key = (column.name | lower) %}
+    {% if column_key not in column_map %}
+        {% do exceptions.raise_compiler_error('column key ' ~ column_key ~ ' not found in ' ~ (column_map | list | string)) %}
+    {% endif %}
+
+    {% set type_checks = column_map[column_key] %}
+    {% if not type_checks %}
+        {% do exceptions.raise_compiler_error('no type checks?') %}
+    {% endif %}
+
+    {{ return(not type_check_column(column, type_checks)) }}
+{% endmacro %}
+
+{% test is_type(model, column_map) %}
+    {% if not execute %}
+        {{ return(None) }}
+    {% endif %}
+
+    {% set columns = adapter.get_columns_in_relation(model) %}
+    {% if (column_map | length) != (columns | length) %}
+        {% set column_map_keys = (column_map | list | string) %}
+        {% set column_names = (columns | map(attribute='name') | list | string) %}
+        {% do exceptions.raise_compiler_error('did not get all the columns/all columns not specified:\n' ~ column_map_keys ~ '\nvs\n' ~ column_names) %}
+    {% endif %}
+
+    {% set bad_columns = [] %}
+    {% for column in columns %}
+        {% if is_bad_column(column, column_map) %}
+            {% do bad_columns.append(column.name) %}
+        {% endif %}
+    {% endfor %}
+
+    {% set num_bad_columns = (bad_columns | length) %}
+
+    select '{{ num_bad_columns }}' as bad_column
+    group by 1
+    having bad_column > 0
+
+{% endtest %}
+""".strip()
+
+
+_SEED_CSV = """
+id,orderid,paymentmethod,status,amount,amount_usd,created
+1,1,credit_card,success,1000,10.00,2018-01-01
+2,2,credit_card,success,2000,20.00,2018-01-02
+3,3,coupon,success,100,1.00,2018-01-04
+4,4,coupon,success,2500,25.00,2018-01-05
+5,5,bank_transfer,fail,1700,17.00,2018-01-05
+6,5,bank_transfer,success,1700,17.00,2018-01-05
+7,6,credit_card,success,600,6.00,2018-01-07
+8,7,credit_card,success,1600,16.00,2018-01-09
+9,8,credit_card,success,2300,23.00,2018-01-11
+10,9,gift_card,success,2300,23.00,2018-01-12
+""".strip()
+
+
+_SEED_YML = """
+version: 2
+
+seeds:
+  - name: payments
+    config:
+        column_types:
+            id: string
+            orderid: string
+            paymentmethod: string
+            status: string
+            amount: integer
+            amount_usd: decimal(20,2)
+            created: timestamp
+    tests:
+        - is_type:
+            column_map:
+                id: ["string", "not number"]
+                orderid: ["string", "not number"]
+                paymentmethod: ["string", "not number"]
+                status: ["string", "not number"]
+                amount: ["integer", "number"]
+                amount_usd: ["decimal", "number"]
+                created: ["timestamp", "string"]
+""".strip()
diff --git a/tests/functional/adapter/seed_column_types/test_seed_column_types.py b/tests/functional/adapter/seed_column_types/test_seed_column_types.py
new file mode 100644
index 000000000..3326490f9
--- /dev/null
+++ b/tests/functional/adapter/seed_column_types/test_seed_column_types.py
@@ -0,0 +1,25 @@
+import pytest
+from dbt.tests.util import run_dbt
+from tests.functional.adapter.seed_column_types.fixtures import (
+    _MACRO_TEST_IS_TYPE_SQL,
+    _SEED_CSV,
+    _SEED_YML,
+)
+
+
+@pytest.mark.skip_profile("spark_session")
+class TestSeedColumnTypesCast:
+    @pytest.fixture(scope="class")
+    def macros(self):
+        return {"test_is_type.sql": _MACRO_TEST_IS_TYPE_SQL}
+
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {"payments.csv": _SEED_CSV, "schema.yml": _SEED_YML}
+
+    #  We want to test seed types because hive would cause all fields to be strings.
+    # setting column_types in project.yml should change them and pass.
+    def test_column_seed_type(self, project):
+        results = run_dbt(["seed"])
+        assert len(results) == 1
+        run_dbt(["test"], expect_pass=False)
diff --git a/tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py b/tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py
index 86c15a55a..c445fe671 100644
--- a/tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py
+++ b/tests/functional/adapter/store_test_failures_tests/test_store_test_failures.py
@@ -1,31 +1,29 @@
 import pytest
 
-from dbt.tests.adapter.store_test_failures_tests.test_store_test_failures import StoreTestFailuresBase, \
-    TEST_AUDIT_SCHEMA_SUFFIX
+from dbt.tests.adapter.store_test_failures_tests.test_store_test_failures import (
+    StoreTestFailuresBase,
+    TEST_AUDIT_SCHEMA_SUFFIX,
+)
 
 
-@pytest.mark.skip_profile('spark_session', 'databricks_cluster', 'databricks_sql_endpoint')
+@pytest.mark.skip_profile("spark_session", "databricks_cluster", "databricks_sql_endpoint")
 class TestSparkStoreTestFailures(StoreTestFailuresBase):
-
     @pytest.fixture(scope="class")
     def project_config_update(self):
         return {
             "seeds": {
                 "quote_columns": True,
             },
-            'tests': {
-                "+schema": TEST_AUDIT_SCHEMA_SUFFIX,
-                '+store_failures': True
-            }
+            "tests": {"+schema": TEST_AUDIT_SCHEMA_SUFFIX, "+store_failures": True},
         }
+
     def test_store_and_assert(self, project):
         self.run_tests_store_one_failure(project)
         self.run_tests_store_failures_and_assert(project)
 
 
-@pytest.mark.skip_profile('apache_spark', 'spark_session')
+@pytest.mark.skip_profile("apache_spark", "spark_session")
 class TestSparkStoreTestFailuresWithDelta(StoreTestFailuresBase):
-
     @pytest.fixture(scope="class")
     def project_config_update(self):
         return {
@@ -34,11 +32,11 @@ def project_config_update(self):
                 "test": self.column_type_overrides(),
                 "+file_format": "delta",
             },
-            'tests': {
+            "tests": {
                 "+schema": TEST_AUDIT_SCHEMA_SUFFIX,
-                '+store_failures': True,
-                '+file_format': 'delta',
-            }
+                "+store_failures": True,
+                "+file_format": "delta",
+            },
         }
 
     def test_store_and_assert_failure_with_delta(self, project):
diff --git a/tests/functional/adapter/test_basic.py b/tests/functional/adapter/test_basic.py
index bdccf169d..072d211d6 100644
--- a/tests/functional/adapter/test_basic.py
+++ b/tests/functional/adapter/test_basic.py
@@ -14,7 +14,7 @@
 from dbt.tests.adapter.basic.test_adapter_methods import BaseAdapterMethod
 
 
-@pytest.mark.skip_profile('spark_session')
+@pytest.mark.skip_profile("spark_session")
 class TestSimpleMaterializationsSpark(BaseSimpleMaterializations):
     pass
 
@@ -25,7 +25,7 @@ class TestSingularTestsSpark(BaseSingularTests):
 
 # The local cluster currently tests on spark 2.x, which does not support this
 # if we upgrade it to 3.x, we can enable this test
-@pytest.mark.skip_profile('apache_spark')
+@pytest.mark.skip_profile("apache_spark")
 class TestSingularTestsEphemeralSpark(BaseSingularTestsEphemeral):
     pass
 
@@ -34,12 +34,12 @@ class TestEmptySpark(BaseEmpty):
     pass
 
 
-@pytest.mark.skip_profile('spark_session')
+@pytest.mark.skip_profile("spark_session")
 class TestEphemeralSpark(BaseEphemeral):
     pass
 
 
-@pytest.mark.skip_profile('spark_session')
+@pytest.mark.skip_profile("spark_session")
 class TestIncrementalSpark(BaseIncremental):
     pass
 
@@ -50,7 +50,7 @@ class TestGenericTestsSpark(BaseGenericTests):
 
 # These tests were not enabled in the dbtspec files, so skipping here.
 # Error encountered was: Error running query: java.lang.ClassNotFoundException: delta.DefaultSource
-@pytest.mark.skip_profile('apache_spark', 'spark_session')
+@pytest.mark.skip_profile("apache_spark", "spark_session")
 class TestSnapshotCheckColsSpark(BaseSnapshotCheckCols):
     @pytest.fixture(scope="class")
     def project_config_update(self):
@@ -60,13 +60,13 @@ def project_config_update(self):
             },
             "snapshots": {
                 "+file_format": "delta",
-            }
+            },
         }
 
 
 # These tests were not enabled in the dbtspec files, so skipping here.
 # Error encountered was: Error running query: java.lang.ClassNotFoundException: delta.DefaultSource
-@pytest.mark.skip_profile('apache_spark', 'spark_session')
+@pytest.mark.skip_profile("apache_spark", "spark_session")
 class TestSnapshotTimestampSpark(BaseSnapshotTimestamp):
     @pytest.fixture(scope="class")
     def project_config_update(self):
@@ -76,9 +76,10 @@ def project_config_update(self):
             },
             "snapshots": {
                 "+file_format": "delta",
-            }
+            },
         }
 
-@pytest.mark.skip_profile('spark_session')
+
+@pytest.mark.skip_profile("spark_session")
 class TestBaseAdapterMethod(BaseAdapterMethod):
     pass
diff --git a/tests/functional/adapter/test_constraints.py b/tests/functional/adapter/test_constraints.py
new file mode 100644
index 000000000..41b50ef81
--- /dev/null
+++ b/tests/functional/adapter/test_constraints.py
@@ -0,0 +1,385 @@
+import pytest
+from dbt.tests.adapter.constraints.test_constraints import (
+    BaseModelConstraintsRuntimeEnforcement,
+    BaseTableConstraintsColumnsEqual,
+    BaseViewConstraintsColumnsEqual,
+    BaseIncrementalConstraintsColumnsEqual,
+    BaseConstraintsRuntimeDdlEnforcement,
+    BaseConstraintsRollback,
+    BaseIncrementalConstraintsRuntimeDdlEnforcement,
+    BaseIncrementalConstraintsRollback,
+    BaseConstraintQuotedColumn,
+)
+from dbt.tests.adapter.constraints.fixtures import (
+    constrained_model_schema_yml,
+    my_model_sql,
+    my_model_wrong_order_sql,
+    my_model_wrong_name_sql,
+    model_schema_yml,
+    my_model_view_wrong_order_sql,
+    my_model_view_wrong_name_sql,
+    my_model_incremental_wrong_order_sql,
+    my_model_incremental_wrong_name_sql,
+    my_incremental_model_sql,
+    model_fk_constraint_schema_yml,
+    my_model_wrong_order_depends_on_fk_sql,
+    foreign_key_model_sql,
+    my_model_incremental_wrong_order_depends_on_fk_sql,
+    my_model_with_quoted_column_name_sql,
+    model_quoted_column_schema_yml,
+)
+
+# constraints are enforced via 'alter' statements that run after table creation
+_expected_sql_spark = """
+create or replace table 
+    using delta
+    as
+select
+  id,
+  color,
+  date_day
+from
+
+(
+    -- depends_on: 
+    select
+    'blue' as color,
+    1 as id,
+    '2019-01-01' as date_day ) as model_subq
+"""
+
+_expected_sql_spark_model_constraints = """
+create or replace table 
+    using delta
+    as
+select
+  id,
+  color,
+  date_day
+from
+
+(
+    -- depends_on: 
+    select
+    'blue' as color,
+    1 as id,
+    '2019-01-01' as date_day ) as model_subq
+"""
+
+# Different on Spark:
+# - does not support a data type named 'text' (TODO handle this in the base test classes using string_type
+constraints_yml = model_schema_yml.replace("text", "string").replace("primary key", "")
+model_fk_constraint_schema_yml = model_fk_constraint_schema_yml.replace("text", "string").replace(
+    "primary key", ""
+)
+model_constraints_yml = constrained_model_schema_yml.replace("text", "string")
+
+
+class PyodbcSetup:
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": {
+                "+file_format": "delta",
+            }
+        }
+
+    @pytest.fixture
+    def string_type(self):
+        return "STR"
+
+    @pytest.fixture
+    def int_type(self):
+        return "INT"
+
+    @pytest.fixture
+    def schema_string_type(self):
+        return "STRING"
+
+    @pytest.fixture
+    def schema_int_type(self):
+        return "INT"
+
+    @pytest.fixture
+    def data_types(self, int_type, schema_int_type, string_type, schema_string_type):
+        # sql_column_value, schema_data_type, error_data_type
+        return [
+            ["1", schema_int_type, int_type],
+            ['"1"', schema_string_type, string_type],
+            ["true", "boolean", "BOOL"],
+            ['array("1","2","3")', "string", string_type],
+            ["array(1,2,3)", "string", string_type],
+            ["6.45", "decimal", "DECIMAL"],
+            ["cast('2019-01-01' as date)", "date", "DATE"],
+            ["cast('2019-01-01' as timestamp)", "timestamp", "DATETIME"],
+        ]
+
+
+class DatabricksHTTPSetup:
+    @pytest.fixture
+    def string_type(self):
+        return "STRING_TYPE"
+
+    @pytest.fixture
+    def int_type(self):
+        return "INT_TYPE"
+
+    @pytest.fixture
+    def schema_string_type(self):
+        return "STRING"
+
+    @pytest.fixture
+    def schema_int_type(self):
+        return "INT"
+
+    @pytest.fixture
+    def data_types(self, int_type, schema_int_type, string_type, schema_string_type):
+        # sql_column_value, schema_data_type, error_data_type
+        return [
+            ["1", schema_int_type, int_type],
+            ['"1"', schema_string_type, string_type],
+            ["true", "boolean", "BOOLEAN_TYPE"],
+            ['array("1","2","3")', "array", "ARRAY_TYPE"],
+            ["array(1,2,3)", "array", "ARRAY_TYPE"],
+            ["cast('2019-01-01' as date)", "date", "DATE_TYPE"],
+            ["cast('2019-01-01' as timestamp)", "timestamp", "TIMESTAMP_TYPE"],
+            ["cast(1.0 AS DECIMAL(4, 2))", "decimal", "DECIMAL_TYPE"],
+        ]
+
+
+@pytest.mark.skip_profile("spark_session", "apache_spark", "databricks_http_cluster")
+class TestSparkTableConstraintsColumnsEqualPyodbc(PyodbcSetup, BaseTableConstraintsColumnsEqual):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model_wrong_order.sql": my_model_wrong_order_sql,
+            "my_model_wrong_name.sql": my_model_wrong_name_sql,
+            "constraints_schema.yml": constraints_yml,
+        }
+
+
+@pytest.mark.skip_profile("spark_session", "apache_spark", "databricks_http_cluster")
+class TestSparkViewConstraintsColumnsEqualPyodbc(PyodbcSetup, BaseViewConstraintsColumnsEqual):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model_wrong_order.sql": my_model_view_wrong_order_sql,
+            "my_model_wrong_name.sql": my_model_view_wrong_name_sql,
+            "constraints_schema.yml": constraints_yml,
+        }
+
+
+@pytest.mark.skip_profile("spark_session", "apache_spark", "databricks_http_cluster")
+class TestSparkIncrementalConstraintsColumnsEqualPyodbc(
+    PyodbcSetup, BaseIncrementalConstraintsColumnsEqual
+):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model_wrong_order.sql": my_model_incremental_wrong_order_sql,
+            "my_model_wrong_name.sql": my_model_incremental_wrong_name_sql,
+            "constraints_schema.yml": constraints_yml,
+        }
+
+
+@pytest.mark.skip_profile(
+    "spark_session", "apache_spark", "databricks_sql_endpoint", "databricks_cluster"
+)
+class TestSparkTableConstraintsColumnsEqualDatabricksHTTP(
+    DatabricksHTTPSetup, BaseTableConstraintsColumnsEqual
+):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model_wrong_order.sql": my_model_wrong_order_sql,
+            "my_model_wrong_name.sql": my_model_wrong_name_sql,
+            "constraints_schema.yml": constraints_yml,
+        }
+
+
+@pytest.mark.skip_profile(
+    "spark_session", "apache_spark", "databricks_sql_endpoint", "databricks_cluster"
+)
+class TestSparkViewConstraintsColumnsEqualDatabricksHTTP(
+    DatabricksHTTPSetup, BaseViewConstraintsColumnsEqual
+):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model_wrong_order.sql": my_model_view_wrong_order_sql,
+            "my_model_wrong_name.sql": my_model_view_wrong_name_sql,
+            "constraints_schema.yml": constraints_yml,
+        }
+
+
+@pytest.mark.skip_profile(
+    "spark_session", "apache_spark", "databricks_sql_endpoint", "databricks_cluster"
+)
+class TestSparkIncrementalConstraintsColumnsEqualDatabricksHTTP(
+    DatabricksHTTPSetup, BaseIncrementalConstraintsColumnsEqual
+):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model_wrong_order.sql": my_model_incremental_wrong_order_sql,
+            "my_model_wrong_name.sql": my_model_incremental_wrong_name_sql,
+            "constraints_schema.yml": constraints_yml,
+        }
+
+
+class BaseSparkConstraintsDdlEnforcementSetup:
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": {
+                "+file_format": "delta",
+            }
+        }
+
+    @pytest.fixture(scope="class")
+    def expected_sql(self):
+        return _expected_sql_spark
+
+
+@pytest.mark.skip_profile("spark_session", "apache_spark")
+class TestSparkTableConstraintsDdlEnforcement(
+    BaseSparkConstraintsDdlEnforcementSetup, BaseConstraintsRuntimeDdlEnforcement
+):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model.sql": my_model_wrong_order_depends_on_fk_sql,
+            "foreign_key_model.sql": foreign_key_model_sql,
+            "constraints_schema.yml": model_fk_constraint_schema_yml,
+        }
+
+
+@pytest.mark.skip_profile("spark_session", "apache_spark")
+class TestSparkIncrementalConstraintsDdlEnforcement(
+    BaseSparkConstraintsDdlEnforcementSetup, BaseIncrementalConstraintsRuntimeDdlEnforcement
+):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model.sql": my_model_incremental_wrong_order_depends_on_fk_sql,
+            "foreign_key_model.sql": foreign_key_model_sql,
+            "constraints_schema.yml": model_fk_constraint_schema_yml,
+        }
+
+
+@pytest.mark.skip_profile("spark_session", "apache_spark", "databricks_http_cluster")
+class TestSparkConstraintQuotedColumn(PyodbcSetup, BaseConstraintQuotedColumn):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model.sql": my_model_with_quoted_column_name_sql,
+            "constraints_schema.yml": model_quoted_column_schema_yml.replace(
+                "text", "string"
+            ).replace('"from"', "`from`"),
+        }
+
+    @pytest.fixture(scope="class")
+    def expected_sql(self):
+        return """
+create or replace table 
+    using delta
+    as
+select
+  id,
+  `from`,
+  date_day
+from
+
+(
+    select
+    'blue' as `from`,
+    1 as id,
+    '2019-01-01' as date_day ) as model_subq
+"""
+
+
+class BaseSparkConstraintsRollbackSetup:
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": {
+                "+file_format": "delta",
+            }
+        }
+
+    @pytest.fixture(scope="class")
+    def expected_error_messages(self):
+        return [
+            "violate the new CHECK constraint",
+            "DELTA_NEW_CHECK_CONSTRAINT_VIOLATION",
+            "DELTA_NEW_NOT_NULL_VIOLATION",
+            "violate the new NOT NULL constraint",
+            "(id > 0) violated by row with values:",  # incremental mats
+            "DELTA_VIOLATE_CONSTRAINT_WITH_VALUES",  # incremental mats
+            "NOT NULL constraint violated for column",
+        ]
+
+    def assert_expected_error_messages(self, error_message, expected_error_messages):
+        # This needs to be ANY instead of ALL
+        # The CHECK constraint is added before the NOT NULL constraint
+        # and different connection types display/truncate the error message in different ways...
+        assert any(msg in error_message for msg in expected_error_messages)
+
+
+@pytest.mark.skip_profile("spark_session", "apache_spark")
+class TestSparkTableConstraintsRollback(
+    BaseSparkConstraintsRollbackSetup, BaseConstraintsRollback
+):
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model.sql": my_model_sql,
+            "constraints_schema.yml": constraints_yml,
+        }
+
+    # On Spark/Databricks, constraints are applied *after* the table is replaced.
+    # We don't have any way to "rollback" the table to its previous happy state.
+    # So the 'color' column will be updated to 'red', instead of 'blue'.
+    @pytest.fixture(scope="class")
+    def expected_color(self):
+        return "red"
+
+
+@pytest.mark.skip_profile("spark_session", "apache_spark")
+class TestSparkIncrementalConstraintsRollback(
+    BaseSparkConstraintsRollbackSetup, BaseIncrementalConstraintsRollback
+):
+    # color stays blue for incremental models since it's a new row that just
+    # doesn't get inserted
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model.sql": my_incremental_model_sql,
+            "constraints_schema.yml": constraints_yml,
+        }
+
+
+# TODO: Like the tests above, this does test that model-level constraints don't
+# result in errors, but it does not verify that they are actually present in
+# Spark and that the ALTER TABLE statement actually ran.
+@pytest.mark.skip_profile("spark_session", "apache_spark")
+class TestSparkModelConstraintsRuntimeEnforcement(BaseModelConstraintsRuntimeEnforcement):
+    @pytest.fixture(scope="class")
+    def project_config_update(self):
+        return {
+            "models": {
+                "+file_format": "delta",
+            }
+        }
+
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "my_model.sql": my_model_wrong_order_depends_on_fk_sql,
+            "foreign_key_model.sql": foreign_key_model_sql,
+            "constraints_schema.yml": model_fk_constraint_schema_yml,
+        }
+
+    @pytest.fixture(scope="class")
+    def expected_sql(self):
+        return _expected_sql_spark_model_constraints
diff --git a/tests/functional/adapter/test_get_columns_in_relation.py b/tests/functional/adapter/test_get_columns_in_relation.py
new file mode 100644
index 000000000..a037bb1ca
--- /dev/null
+++ b/tests/functional/adapter/test_get_columns_in_relation.py
@@ -0,0 +1,33 @@
+import pytest
+
+from dbt.tests.util import run_dbt, relation_from_name, check_relations_equal_with_relations
+
+
+_MODEL_CHILD = "select 1"
+
+
+_MODEL_PARENT = """
+{% set cols = adapter.get_columns_in_relation(ref('child')) %}
+
+select
+    {% for col in cols %}
+        {{ adapter.quote(col.column) }}{%- if not loop.last %},{{ '\n ' }}{% endif %}
+    {% endfor %}
+from {{ ref('child') }}
+"""
+
+
+class TestColumnsInRelation:
+    @pytest.fixture(scope="class")
+    def models(self):
+        return {
+            "child.sql": _MODEL_CHILD,
+            "parent.sql": _MODEL_PARENT,
+        }
+
+    @pytest.mark.skip_profile("databricks_http_cluster", "spark_session")
+    def test_get_columns_in_relation(self, project):
+        run_dbt(["run"])
+        child = relation_from_name(project.adapter, "child")
+        parent = relation_from_name(project.adapter, "parent")
+        check_relations_equal_with_relations(project.adapter, [child, parent])
diff --git a/tests/functional/adapter/test_grants.py b/tests/functional/adapter/test_grants.py
index 8e0341df6..1b1a005ad 100644
--- a/tests/functional/adapter/test_grants.py
+++ b/tests/functional/adapter/test_grants.py
@@ -55,6 +55,6 @@ def project_config_update(self):
 class TestInvalidGrantsSpark(BaseInvalidGrants):
     def grantee_does_not_exist_error(self):
         return "RESOURCE_DOES_NOT_EXIST"
-        
+
     def privilege_does_not_exist_error(self):
         return "Action Unknown"
diff --git a/tests/functional/adapter/test_python_model.py b/tests/functional/adapter/test_python_model.py
index e5e7debe0..1195cbd3e 100644
--- a/tests/functional/adapter/test_python_model.py
+++ b/tests/functional/adapter/test_python_model.py
@@ -1,16 +1,23 @@
 import os
 import pytest
-from dbt.tests.util import run_dbt, write_file, run_dbt_and_capture
-from dbt.tests.adapter.python_model.test_python_model import BasePythonModelTests, BasePythonIncrementalTests
+from dbt.tests.util import run_dbt, write_file
+from dbt.tests.adapter.python_model.test_python_model import (
+    BasePythonModelTests,
+    BasePythonIncrementalTests,
+)
 from dbt.tests.adapter.python_model.test_spark import BasePySparkTests
+
+
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
 class TestPythonModelSpark(BasePythonModelTests):
     pass
 
+
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
 class TestPySpark(BasePySparkTests):
     pass
 
+
 @pytest.mark.skip_profile("apache_spark", "spark_session", "databricks_sql_endpoint")
 class TestPythonIncrementalModelSpark(BasePythonIncrementalTests):
     @pytest.fixture(scope="class")
diff --git a/tests/functional/adapter/utils/test_data_types.py b/tests/functional/adapter/utils/test_data_types.py
index ce6085803..8ca38ab1e 100644
--- a/tests/functional/adapter/utils/test_data_types.py
+++ b/tests/functional/adapter/utils/test_data_types.py
@@ -1,10 +1,12 @@
 import pytest
 from dbt.tests.adapter.utils.data_types.test_type_bigint import BaseTypeBigInt
 from dbt.tests.adapter.utils.data_types.test_type_float import (
-    BaseTypeFloat, seeds__expected_csv as seeds__float_expected_csv
+    BaseTypeFloat,
+    seeds__expected_csv as seeds__float_expected_csv,
 )
 from dbt.tests.adapter.utils.data_types.test_type_int import (
-    BaseTypeInt, seeds__expected_csv as seeds__int_expected_csv
+    BaseTypeInt,
+    seeds__expected_csv as seeds__int_expected_csv,
 )
 from dbt.tests.adapter.utils.data_types.test_type_numeric import BaseTypeNumeric
 from dbt.tests.adapter.utils.data_types.test_type_string import BaseTypeString
@@ -27,6 +29,7 @@ class TestTypeBigInt(BaseTypeBigInt):
         float_col: float
 """
 
+
 class TestTypeFloat(BaseTypeFloat):
     @pytest.fixture(scope="class")
     def seeds(self):
@@ -46,6 +49,7 @@ def seeds(self):
         int_col: int
 """
 
+
 class TestTypeInt(BaseTypeInt):
     @pytest.fixture(scope="class")
     def seeds(self):
@@ -54,16 +58,16 @@ def seeds(self):
             "expected.yml": seeds__int_expected_yml,
         }
 
-    
+
 class TestTypeNumeric(BaseTypeNumeric):
     def numeric_fixture_type(self):
         return "decimal(28,6)"
 
-    
+
 class TestTypeString(BaseTypeString):
     pass
 
-    
+
 class TestTypeTimestamp(BaseTypeTimestamp):
     pass
 
diff --git a/tests/functional/adapter/utils/test_timestamps.py b/tests/functional/adapter/utils/test_timestamps.py
index 8507c0a6b..d05d23997 100644
--- a/tests/functional/adapter/utils/test_timestamps.py
+++ b/tests/functional/adapter/utils/test_timestamps.py
@@ -5,13 +5,13 @@
 class TestCurrentTimestampSpark(BaseCurrentTimestamps):
     @pytest.fixture(scope="class")
     def models(self):
-        return {"get_current_timestamp.sql": "select {{ current_timestamp() }} as current_timestamp"}
+        return {
+            "get_current_timestamp.sql": "select {{ current_timestamp() }} as current_timestamp"
+        }
 
     @pytest.fixture(scope="class")
     def expected_schema(self):
-        return {
-            "current_timestamp": "timestamp"
-        }
+        return {"current_timestamp": "timestamp"}
 
     @pytest.fixture(scope="class")
     def expected_sql(self):
diff --git a/tests/functional/adapter/utils/test_utils.py b/tests/functional/adapter/utils/test_utils.py
index 6fd6a9121..0dc526564 100644
--- a/tests/functional/adapter/utils/test_utils.py
+++ b/tests/functional/adapter/utils/test_utils.py
@@ -11,7 +11,6 @@
 from dbt.tests.adapter.utils.test_dateadd import BaseDateAdd
 from dbt.tests.adapter.utils.test_datediff import BaseDateDiff
 from dbt.tests.adapter.utils.test_date_trunc import BaseDateTrunc
-from dbt.tests.adapter.utils.test_escape_single_quotes import BaseEscapeSingleQuotesQuote
 from dbt.tests.adapter.utils.test_escape_single_quotes import BaseEscapeSingleQuotesBackslash
 from dbt.tests.adapter.utils.test_except import BaseExcept
 from dbt.tests.adapter.utils.test_hash import BaseHash
@@ -22,6 +21,7 @@
 from dbt.tests.adapter.utils.test_replace import BaseReplace
 from dbt.tests.adapter.utils.test_right import BaseRight
 from dbt.tests.adapter.utils.test_safe_cast import BaseSafeCast
+
 from dbt.tests.adapter.utils.test_split_part import BaseSplitPart
 from dbt.tests.adapter.utils.test_string_literal import BaseStringLiteral
 
@@ -30,6 +30,21 @@
 from dbt.tests.adapter.utils.fixture_listagg import models__test_listagg_yml
 from tests.functional.adapter.utils.fixture_listagg import models__test_listagg_no_order_by_sql
 
+seeds__data_split_part_csv = """parts,split_on,result_1,result_2,result_3,result_4
+a|b|c,|,a,b,c,c
+1|2|3,|,1,2,3,3
+EMPTY|EMPTY|EMPTY,|,EMPTY,EMPTY,EMPTY,EMPTY
+"""
+
+seeds__data_last_day_csv = """date_day,date_part,result
+2018-01-02,month,2018-01-31
+2018-01-02,quarter,2018-03-31
+2018-01-02,year,2018-12-31
+"""
+
+
+# skipped: ,month,
+
 
 class TestAnyValue(BaseAnyValue):
     pass
@@ -55,7 +70,7 @@ class TestCastBoolToText(BaseCastBoolToText):
     pass
 
 
-@pytest.mark.skip_profile('spark_session')
+@pytest.mark.skip_profile("spark_session")
 class TestConcat(BaseConcat):
     pass
 
@@ -70,7 +85,7 @@ class TestDateAdd(BaseDateAdd):
 
 
 # this generates too much SQL to run successfully in our testing environments :(
-@pytest.mark.skip_profile('apache_spark', 'spark_session')
+@pytest.mark.skip_profile("apache_spark", "spark_session")
 class TestDateDiff(BaseDateDiff):
     pass
 
@@ -87,7 +102,7 @@ class TestExcept(BaseExcept):
     pass
 
 
-@pytest.mark.skip_profile('spark_session')
+@pytest.mark.skip_profile("spark_session")
 class TestHash(BaseHash):
     pass
 
@@ -96,8 +111,11 @@ class TestIntersect(BaseIntersect):
     pass
 
 
+@pytest.mark.skip_profile("spark_session")  # spark session crashes in CI
 class TestLastDay(BaseLastDay):
-    pass
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {"data_last_day.csv": seeds__data_last_day_csv}
 
 
 class TestLength(BaseLength):
@@ -121,12 +139,12 @@ class TestPosition(BasePosition):
     pass
 
 
-@pytest.mark.skip_profile('spark_session')
+@pytest.mark.skip_profile("spark_session")
 class TestReplace(BaseReplace):
     pass
 
 
-@pytest.mark.skip_profile('spark_session')
+@pytest.mark.skip_profile("spark_session")
 class TestRight(BaseRight):
     pass
 
@@ -136,7 +154,9 @@ class TestSafeCast(BaseSafeCast):
 
 
 class TestSplitPart(BaseSplitPart):
-    pass
+    @pytest.fixture(scope="class")
+    def seeds(self):
+        return {"data_split_part.csv": seeds__data_split_part_csv}
 
 
 class TestStringLiteral(BaseStringLiteral):
diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py
deleted file mode 100644
index e69de29bb..000000000
diff --git a/tests/integration/base.py b/tests/integration/base.py
deleted file mode 100644
index 7e557217f..000000000
--- a/tests/integration/base.py
+++ /dev/null
@@ -1,965 +0,0 @@
-import json
-import os
-import io
-import random
-import shutil
-import sys
-import tempfile
-import traceback
-import unittest
-from contextlib import contextmanager
-from datetime import datetime
-from functools import wraps
-
-import pyodbc
-import pytest
-import yaml
-from unittest.mock import patch
-
-import dbt.main as dbt
-from dbt import flags
-from dbt.deprecations import reset_deprecations
-from dbt.adapters.factory import get_adapter, reset_adapters, register_adapter
-from dbt.clients.jinja import template_cache
-from dbt.config import RuntimeConfig
-from dbt.context import providers
-from dbt.logger import log_manager
-from dbt.events.functions import (
-    capture_stdout_logs, fire_event, setup_event_logger, stop_capture_stdout_logs
-)
-from dbt.events import AdapterLogger
-from dbt.contracts.graph.manifest import Manifest
-
-logger = AdapterLogger("Spark")
-
-INITIAL_ROOT = os.getcwd()
-
-
-def normalize(path):
-    """On windows, neither is enough on its own:
-
-    >>> normcase('C:\\documents/ALL CAPS/subdir\\..')
-    'c:\\documents\\all caps\\subdir\\..'
-    >>> normpath('C:\\documents/ALL CAPS/subdir\\..')
-    'C:\\documents\\ALL CAPS'
-    >>> normpath(normcase('C:\\documents/ALL CAPS/subdir\\..'))
-    'c:\\documents\\all caps'
-    """
-    return os.path.normcase(os.path.normpath(path))
-
-
-class Normalized:
-    def __init__(self, value):
-        self.value = value
-
-    def __repr__(self):
-        return f'Normalized({self.value!r})'
-
-    def __str__(self):
-        return f'Normalized({self.value!s})'
-
-    def __eq__(self, other):
-        return normalize(self.value) == normalize(other)
-
-
-class FakeArgs:
-    def __init__(self):
-        self.threads = 1
-        self.defer = False
-        self.full_refresh = False
-        self.models = None
-        self.select = None
-        self.exclude = None
-        self.single_threaded = False
-        self.selector_name = None
-        self.state = None
-        self.defer = None
-
-
-class TestArgs:
-    __test__ = False
-
-    def __init__(self, kwargs):
-        self.which = 'run'
-        self.single_threaded = False
-        self.profiles_dir = None
-        self.project_dir = None
-        self.__dict__.update(kwargs)
-
-
-def _profile_from_test_name(test_name):
-    adapter_names = ('apache_spark', 'databricks_cluster',
-                     'databricks_sql_endpoint')
-    adapters_in_name = sum(x in test_name for x in adapter_names)
-    if adapters_in_name != 1:
-        raise ValueError(
-            'test names must have exactly 1 profile choice embedded, {} has {}'
-            .format(test_name, adapters_in_name)
-        )
-
-    for adapter_name in adapter_names:
-        if adapter_name in test_name:
-            return adapter_name
-
-    raise ValueError(
-        'could not find adapter name in test name {}'.format(test_name)
-    )
-
-
-def _pytest_test_name():
-    return os.environ['PYTEST_CURRENT_TEST'].split()[0]
-
-
-def _pytest_get_test_root():
-    test_path = _pytest_test_name().split('::')[0]
-    relative_to = INITIAL_ROOT
-    head = os.path.relpath(test_path, relative_to)
-
-    path_parts = []
-    while head:
-        head, tail = os.path.split(head)
-        path_parts.append(tail)
-    path_parts.reverse()
-    # dbt tests are all of the form 'tests/integration/suite_name'
-    target = os.path.join(*path_parts[:3])  # TODO: try to not hard code this
-    return os.path.join(relative_to, target)
-
-
-def _really_makedirs(path):
-    while not os.path.exists(path):
-        try:
-            os.makedirs(path)
-        except EnvironmentError:
-            raise
-
-
-class DBTIntegrationTest(unittest.TestCase):
-    CREATE_SCHEMA_STATEMENT = 'CREATE SCHEMA {}'
-    DROP_SCHEMA_STATEMENT = 'DROP SCHEMA IF EXISTS {} CASCADE'
-
-    _randint = random.randint(0, 9999)
-    _runtime_timedelta = (datetime.utcnow() - datetime(1970, 1, 1, 0, 0, 0))
-    _runtime = (
-        (int(_runtime_timedelta.total_seconds() * 1e6)) +
-        _runtime_timedelta.microseconds
-    )
-
-    prefix = f'test{_runtime}{_randint:04}'
-    setup_alternate_db = False
-
-    def apache_spark_profile(self):
-        return {
-            'config': {
-                'send_anonymous_usage_stats': False
-            },
-            'test': {
-                'outputs': {
-                    'thrift': {
-                        'type': 'spark',
-                        'host': 'localhost',
-                        'user': 'dbt',
-                        'method': 'thrift',
-                        'port': 10000,
-                        'connect_retries': 3,
-                        'connect_timeout': 5,
-                        'retry_all': True,
-                        'schema': self.unique_schema()
-                    },
-                },
-                'target': 'thrift'
-            }
-        }
-
-    def databricks_cluster_profile(self):
-        return {
-            'config': {
-                'send_anonymous_usage_stats': False
-            },
-            'test': {
-                'outputs': {
-                    'cluster': {
-                        'type': 'spark',
-                        'method': 'odbc',
-                        'host': os.getenv('DBT_DATABRICKS_HOST_NAME'),
-                        'cluster': os.getenv('DBT_DATABRICKS_CLUSTER_NAME'),
-                        'token': os.getenv('DBT_DATABRICKS_TOKEN'),
-                        'driver': os.getenv('ODBC_DRIVER'),
-                        'port': 443,
-                        'connect_retries': 3,
-                        'connect_timeout': 5,
-                        'retry_all': True,
-                        'schema': self.unique_schema()
-                    },
-                },
-                'target': 'cluster'
-            }
-        }
-
-    def databricks_sql_endpoint_profile(self):
-        return {
-            'config': {
-                'send_anonymous_usage_stats': False
-            },
-            'test': {
-                'outputs': {
-                    'endpoint': {
-                        'type': 'spark',
-                        'method': 'odbc',
-                        'host': os.getenv('DBT_DATABRICKS_HOST_NAME'),
-                        'endpoint': os.getenv('DBT_DATABRICKS_ENDPOINT'),
-                        'token': os.getenv('DBT_DATABRICKS_TOKEN'),
-                        'driver': os.getenv('ODBC_DRIVER'),
-                        'port': 443,
-                        'connect_retries': 3,
-                        'connect_timeout': 5,
-                        'retry_all': True,
-                        'schema': self.unique_schema()
-                    },
-                },
-                'target': 'endpoint'
-            }
-        }
-
-    @property
-    def packages_config(self):
-        return None
-
-    @property
-    def selectors_config(self):
-        return None
-
-    def unique_schema(self):
-        schema = self.schema
-
-        to_return = "{}_{}".format(self.prefix, schema)
-
-        return to_return.lower()
-
-    @property
-    def default_database(self):
-        database = self.config.credentials.database
-        return database
-
-    @property
-    def alternative_database(self):
-        return None
-
-    def get_profile(self, adapter_type):
-        if adapter_type == 'apache_spark':
-            return self.apache_spark_profile()
-        elif adapter_type == 'databricks_cluster':
-            return self.databricks_cluster_profile()
-        elif adapter_type == 'databricks_sql_endpoint':
-            return self.databricks_sql_endpoint_profile()
-        else:
-            raise ValueError('invalid adapter type {}'.format(adapter_type))
-
-    def _pick_profile(self):
-        test_name = self.id().split('.')[-1]
-        return _profile_from_test_name(test_name)
-
-    def _symlink_test_folders(self):
-        for entry in os.listdir(self.test_original_source_path):
-            src = os.path.join(self.test_original_source_path, entry)
-            tst = os.path.join(self.test_root_dir, entry)
-            if os.path.isdir(src) or src.endswith('.sql'):
-                # symlink all sql files and all directories.
-                os.symlink(src, tst)
-        os.symlink(self._logs_dir, os.path.join(self.test_root_dir, 'logs'))
-
-    @property
-    def test_root_realpath(self):
-        if sys.platform == 'darwin':
-            return os.path.realpath(self.test_root_dir)
-        else:
-            return self.test_root_dir
-
-    def _generate_test_root_dir(self):
-        return normalize(tempfile.mkdtemp(prefix='dbt-int-test-'))
-
-    def setUp(self):
-        self.dbt_core_install_root = os.path.dirname(dbt.__file__)
-        log_manager.reset_handlers()
-        self.initial_dir = INITIAL_ROOT
-        os.chdir(self.initial_dir)
-        # before we go anywhere, collect the initial path info
-        self._logs_dir = os.path.join(self.initial_dir, 'logs', self.prefix)
-        setup_event_logger(self._logs_dir)
-        _really_makedirs(self._logs_dir)
-        self.test_original_source_path = _pytest_get_test_root()
-        self.test_root_dir = self._generate_test_root_dir()
-
-        os.chdir(self.test_root_dir)
-        try:
-            self._symlink_test_folders()
-        except Exception as exc:
-            msg = '\n\t'.join((
-                'Failed to symlink test folders!',
-                'initial_dir={0.initial_dir}',
-                'test_original_source_path={0.test_original_source_path}',
-                'test_root_dir={0.test_root_dir}'
-            )).format(self)
-            logger.exception(msg)
-
-            # if logging isn't set up, I still really want this message.
-            print(msg)
-            traceback.print_exc()
-
-            raise
-
-        self._created_schemas = set()
-        reset_deprecations()
-        template_cache.clear()
-
-        self.use_profile(self._pick_profile())
-        self.use_default_project()
-        self.set_packages()
-        self.set_selectors()
-        self.load_config()
-
-    def use_default_project(self, overrides=None):
-        # create a dbt_project.yml
-        base_project_config = {
-            'name': 'test',
-            'version': '1.0',
-            'config-version': 2,
-            'test-paths': [],
-            'source-paths': [self.models],
-            'profile': 'test',
-        }
-
-        project_config = {}
-        project_config.update(base_project_config)
-        project_config.update(self.project_config)
-        project_config.update(overrides or {})
-
-        with open("dbt_project.yml", 'w') as f:
-            yaml.safe_dump(project_config, f, default_flow_style=True)
-
-    def use_profile(self, adapter_type):
-        self.adapter_type = adapter_type
-
-        profile_config = {}
-        default_profile_config = self.get_profile(adapter_type)
-
-        profile_config.update(default_profile_config)
-        profile_config.update(self.profile_config)
-
-        if not os.path.exists(self.test_root_dir):
-            os.makedirs(self.test_root_dir)
-
-        flags.PROFILES_DIR = self.test_root_dir
-        profiles_path = os.path.join(self.test_root_dir, 'profiles.yml')
-        with open(profiles_path, 'w') as f:
-            yaml.safe_dump(profile_config, f, default_flow_style=True)
-        self._profile_config = profile_config
-
-    def set_packages(self):
-        if self.packages_config is not None:
-            with open('packages.yml', 'w') as f:
-                yaml.safe_dump(self.packages_config, f, default_flow_style=True)
-
-    def set_selectors(self):
-        if self.selectors_config is not None:
-            with open('selectors.yml', 'w') as f:
-                yaml.safe_dump(self.selectors_config, f, default_flow_style=True)
-
-    def load_config(self):
-        # we've written our profile and project. Now we want to instantiate a
-        # fresh adapter for the tests.
-        # it's important to use a different connection handle here so
-        # we don't look into an incomplete transaction
-        kwargs = {
-            'profile': None,
-            'profiles_dir': self.test_root_dir,
-            'target': None,
-        }
-
-        config = RuntimeConfig.from_args(TestArgs(kwargs))
-
-        register_adapter(config)
-        adapter = get_adapter(config)
-        adapter.cleanup_connections()
-        self.adapter_type = adapter.type()
-        self.adapter = adapter
-        self.config = config
-
-        self._drop_schemas()
-        self._create_schemas()
-
-    def quote_as_configured(self, value, quote_key):
-        return self.adapter.quote_as_configured(value, quote_key)
-
-    def tearDown(self):
-        # get any current run adapter and clean up its connections before we
-        # reset them. It'll probably be different from ours because
-        # handle_and_check() calls reset_adapters().
-        register_adapter(self.config)
-        adapter = get_adapter(self.config)
-        if adapter is not self.adapter:
-            adapter.cleanup_connections()
-        if not hasattr(self, 'adapter'):
-            self.adapter = adapter
-
-        self._drop_schemas()
-
-        self.adapter.cleanup_connections()
-        reset_adapters()
-        os.chdir(INITIAL_ROOT)
-        try:
-            shutil.rmtree(self.test_root_dir)
-        except EnvironmentError:
-            logger.exception('Could not clean up after test - {} not removable'
-                             .format(self.test_root_dir))
-
-    def _get_schema_fqn(self, database, schema):
-        schema_fqn = self.quote_as_configured(schema, 'schema')
-        return schema_fqn
-
-    def _create_schema_named(self, database, schema):
-        self.run_sql('CREATE SCHEMA {schema}')
-
-    def _drop_schema_named(self, database, schema):
-        self.run_sql('DROP SCHEMA IF EXISTS {schema} CASCADE')
-
-    def _create_schemas(self):
-        schema = self.unique_schema()
-        with self.adapter.connection_named('__test'):
-            self._create_schema_named(self.default_database, schema)
-
-    def _drop_schemas(self):
-        with self.adapter.connection_named('__test'):
-            schema = self.unique_schema()
-            self._drop_schema_named(self.default_database, schema)
-            if self.setup_alternate_db and self.alternative_database:
-                self._drop_schema_named(self.alternative_database, schema)
-
-    @property
-    def project_config(self):
-        return {
-            'config-version': 2,
-        }
-
-    @property
-    def profile_config(self):
-        return {}
-
-    def run_dbt(self, args=None, expect_pass=True, profiles_dir=True):
-        res, success = self.run_dbt_and_check(args=args, profiles_dir=profiles_dir)
-        self.assertEqual(
-            success, expect_pass,
-            "dbt exit state did not match expected")
-
-        return res
-
-
-    def run_dbt_and_capture(self, *args, **kwargs):
-        try:
-            stringbuf = capture_stdout_logs()
-            res = self.run_dbt(*args, **kwargs)
-            stdout = stringbuf.getvalue()
-
-        finally:
-            stop_capture_stdout_logs()
-
-        return res, stdout
-
-    def run_dbt_and_check(self, args=None, profiles_dir=True):
-        log_manager.reset_handlers()
-        if args is None:
-            args = ["run"]
-
-        final_args = []
-
-        if os.getenv('DBT_TEST_SINGLE_THREADED') in ('y', 'Y', '1'):
-            final_args.append('--single-threaded')
-
-        final_args.extend(args)
-
-        if profiles_dir:
-            final_args.extend(['--profiles-dir', self.test_root_dir])
-        final_args.append('--log-cache-events')
-
-        logger.info("Invoking dbt with {}".format(final_args))
-        return dbt.handle_and_check(final_args)
-
-    def run_sql_file(self, path, kwargs=None):
-        with open(path, 'r') as f:
-            statements = f.read().split(";")
-            for statement in statements:
-                self.run_sql(statement, kwargs=kwargs)
-
-    def transform_sql(self, query, kwargs=None):
-        to_return = query
-
-        base_kwargs = {
-            'schema': self.unique_schema(),
-            'database': self.adapter.quote(self.default_database),
-        }
-        if kwargs is None:
-            kwargs = {}
-        base_kwargs.update(kwargs)
-
-        to_return = to_return.format(**base_kwargs)
-
-        return to_return
-
-    def run_sql(self, query, fetch='None', kwargs=None, connection_name=None):
-        if connection_name is None:
-            connection_name = '__test'
-
-        if query.strip() == "":
-            return
-
-        sql = self.transform_sql(query, kwargs=kwargs)
-
-        with self.get_connection(connection_name) as conn:
-            cursor = conn.handle.cursor()
-            try:
-                cursor.execute(sql)
-                if fetch == 'one':
-                    return cursor.fetchall()[0]
-                elif fetch == 'all':
-                    return cursor.fetchall()
-                else:
-                    # we have to fetch.
-                    cursor.fetchall()
-            except pyodbc.ProgrammingError as e:
-                # hacks for dropping schema
-                if "No results.  Previous SQL was not a query." not in str(e):
-                    raise e
-            except Exception as e:
-                conn.handle.rollback()
-                conn.transaction_open = False
-                print(sql)
-                print(e)
-                raise
-            else:
-                conn.transaction_open = False
-
-    def _ilike(self, target, value):
-        return "{} ilike '{}'".format(target, value)
-
-    def get_many_table_columns_bigquery(self, tables, schema, database=None):
-        result = []
-        for table in tables:
-            relation = self._make_relation(table, schema, database)
-            columns = self.adapter.get_columns_in_relation(relation)
-            for col in columns:
-                result.append((table, col.column, col.dtype, col.char_size))
-        return result
-
-    def get_many_table_columns(self, tables, schema, database=None):
-        result = self.get_many_table_columns_bigquery(tables, schema, database)
-        result.sort(key=lambda x: '{}.{}'.format(x[0], x[1]))
-        return result
-
-    def filter_many_columns(self, column):
-        if len(column) == 3:
-            table_name, column_name, data_type = column
-            char_size = None
-        else:
-            table_name, column_name, data_type, char_size = column
-        return (table_name, column_name, data_type, char_size)
-
-    @contextmanager
-    def get_connection(self, name=None):
-        """Create a test connection context where all executed macros, etc will
-        get self.adapter as the adapter.
-
-        This allows tests to run normal adapter macros as if reset_adapters()
-        were not called by handle_and_check (for asserts, etc)
-        """
-        if name is None:
-            name = '__test'
-        with patch.object(providers, 'get_adapter', return_value=self.adapter):
-            with self.adapter.connection_named(name):
-                conn = self.adapter.connections.get_thread_connection()
-                yield conn
-
-    def get_relation_columns(self, relation):
-        with self.get_connection():
-            columns = self.adapter.get_columns_in_relation(relation)
-
-        return sorted(((c.name, c.dtype, c.char_size) for c in columns),
-                      key=lambda x: x[0])
-
-    def get_table_columns(self, table, schema=None, database=None):
-        schema = self.unique_schema() if schema is None else schema
-        database = self.default_database if database is None else database
-        relation = self.adapter.Relation.create(
-            database=database,
-            schema=schema,
-            identifier=table,
-            type='table',
-            quote_policy=self.config.quoting
-        )
-        return self.get_relation_columns(relation)
-
-    def get_table_columns_as_dict(self, tables, schema=None):
-        col_matrix = self.get_many_table_columns(tables, schema)
-        res = {}
-        for row in col_matrix:
-            table_name = row[0]
-            col_def = row[1:]
-            if table_name not in res:
-                res[table_name] = []
-            res[table_name].append(col_def)
-        return res
-
-    def get_models_in_schema(self, schema=None):
-        schema = self.unique_schema() if schema is None else schema
-        sql = """
-                select table_name,
-                        case when table_type = 'BASE TABLE' then 'table'
-                             when table_type = 'VIEW' then 'view'
-                             else table_type
-                        end as materialization
-                from information_schema.tables
-                where {}
-                order by table_name
-                """
-
-        sql = sql.format(self._ilike('table_schema', schema))
-        result = self.run_sql(sql, fetch='all')
-
-        return {model_name: materialization for (model_name, materialization) in result}
-
-    def _assertTablesEqualSql(self, relation_a, relation_b, columns=None):
-        if columns is None:
-            columns = self.get_relation_columns(relation_a)
-        column_names = [c[0] for c in columns]
-
-        sql = self.adapter.get_rows_different_sql(
-            relation_a, relation_b, column_names
-        )
-
-        return sql
-
-    def assertTablesEqual(self, table_a, table_b,
-                          table_a_schema=None, table_b_schema=None,
-                          table_a_db=None, table_b_db=None):
-        if table_a_schema is None:
-            table_a_schema = self.unique_schema()
-
-        if table_b_schema is None:
-            table_b_schema = self.unique_schema()
-
-        if table_a_db is None:
-            table_a_db = self.default_database
-
-        if table_b_db is None:
-            table_b_db = self.default_database
-
-        relation_a = self._make_relation(table_a, table_a_schema, table_a_db)
-        relation_b = self._make_relation(table_b, table_b_schema, table_b_db)
-
-        self._assertTableColumnsEqual(relation_a, relation_b)
-
-        sql = self._assertTablesEqualSql(relation_a, relation_b)
-        result = self.run_sql(sql, fetch='one')
-
-        self.assertEqual(
-            result[0],
-            0,
-            'row_count_difference nonzero: ' + sql
-        )
-        self.assertEqual(
-            result[1],
-            0,
-            'num_mismatched nonzero: ' + sql
-        )
-
-    def _make_relation(self, identifier, schema=None, database=None):
-        if schema is None:
-            schema = self.unique_schema()
-        if database is None:
-            database = self.default_database
-        return self.adapter.Relation.create(
-            database=database,
-            schema=schema,
-            identifier=identifier,
-            quote_policy=self.config.quoting
-        )
-
-    def get_many_relation_columns(self, relations):
-        """Returns a dict of (datbase, schema) -> (dict of (table_name -> list of columns))
-        """
-        schema_fqns = {}
-        for rel in relations:
-            this_schema = schema_fqns.setdefault((rel.database, rel.schema), [])
-            this_schema.append(rel.identifier)
-
-        column_specs = {}
-        for key, tables in schema_fqns.items():
-            database, schema = key
-            columns = self.get_many_table_columns(tables, schema, database=database)
-            table_columns = {}
-            for col in columns:
-                table_columns.setdefault(col[0], []).append(col[1:])
-            for rel_name, columns in table_columns.items():
-                key = (database, schema, rel_name)
-                column_specs[key] = columns
-
-        return column_specs
-
-    def assertManyRelationsEqual(self, relations, default_schema=None, default_database=None):
-        if default_schema is None:
-            default_schema = self.unique_schema()
-        if default_database is None:
-            default_database = self.default_database
-
-        specs = []
-        for relation in relations:
-            if not isinstance(relation, (tuple, list)):
-                relation = [relation]
-
-            assert len(relation) <= 3
-
-            if len(relation) == 3:
-                relation = self._make_relation(*relation)
-            elif len(relation) == 2:
-                relation = self._make_relation(relation[0], relation[1], default_database)
-            elif len(relation) == 1:
-                relation = self._make_relation(relation[0], default_schema, default_database)
-            else:
-                raise ValueError('relation must be a sequence of 1, 2, or 3 values')
-
-            specs.append(relation)
-
-        with self.get_connection():
-            column_specs = self.get_many_relation_columns(specs)
-
-        # make sure everyone has equal column definitions
-        first_columns = None
-        for relation in specs:
-            key = (relation.database, relation.schema, relation.identifier)
-            # get a good error here instead of a hard-to-diagnose KeyError
-            self.assertIn(key, column_specs, f'No columns found for {key}')
-            columns = column_specs[key]
-            if first_columns is None:
-                first_columns = columns
-            else:
-                self.assertEqual(
-                    first_columns, columns,
-                    '{} did not match {}'.format(str(specs[0]), str(relation))
-                )
-
-        # make sure everyone has the same data. if we got here, everyone had
-        # the same column specs!
-        first_relation = None
-        for relation in specs:
-            if first_relation is None:
-                first_relation = relation
-            else:
-                sql = self._assertTablesEqualSql(first_relation, relation,
-                                                 columns=first_columns)
-                result = self.run_sql(sql, fetch='one')
-
-                self.assertEqual(
-                    result[0],
-                    0,
-                    'row_count_difference nonzero: ' + sql
-                )
-                self.assertEqual(
-                    result[1],
-                    0,
-                    'num_mismatched nonzero: ' + sql
-                )
-
-    def assertManyTablesEqual(self, *args):
-        schema = self.unique_schema()
-
-        all_tables = []
-        for table_equivalencies in args:
-            all_tables += list(table_equivalencies)
-
-        all_cols = self.get_table_columns_as_dict(all_tables, schema)
-
-        for table_equivalencies in args:
-            first_table = table_equivalencies[0]
-            first_relation = self._make_relation(first_table)
-
-            # assert that all tables have the same columns
-            base_result = all_cols[first_table]
-            self.assertTrue(len(base_result) > 0)
-
-            for other_table in table_equivalencies[1:]:
-                other_result = all_cols[other_table]
-                self.assertTrue(len(other_result) > 0)
-                self.assertEqual(base_result, other_result)
-
-                other_relation = self._make_relation(other_table)
-                sql = self._assertTablesEqualSql(first_relation,
-                                                 other_relation,
-                                                 columns=base_result)
-                result = self.run_sql(sql, fetch='one')
-
-                self.assertEqual(
-                    result[0],
-                    0,
-                    'row_count_difference nonzero: ' + sql
-                )
-                self.assertEqual(
-                    result[1],
-                    0,
-                    'num_mismatched nonzero: ' + sql
-                )
-
-
-    def _assertTableRowCountsEqual(self, relation_a, relation_b):
-        cmp_query = """
-            with table_a as (
-
-                select count(*) as num_rows from {}
-
-            ), table_b as (
-
-                select count(*) as num_rows from {}
-
-            )
-
-            select table_a.num_rows - table_b.num_rows as difference
-            from table_a, table_b
-
-        """.format(str(relation_a), str(relation_b))
-
-        res = self.run_sql(cmp_query, fetch='one')
-
-        self.assertEqual(int(res[0]), 0, "Row count of table {} doesn't match row count of table {}. ({} rows different)".format(
-                relation_a.identifier,
-                relation_b.identifier,
-                res[0]
-            )
-        )
-
-    def assertTableDoesNotExist(self, table, schema=None, database=None):
-        columns = self.get_table_columns(table, schema, database)
-
-        self.assertEqual(
-            len(columns),
-            0
-        )
-
-    def assertTableDoesExist(self, table, schema=None, database=None):
-        columns = self.get_table_columns(table, schema, database)
-
-        self.assertGreater(
-            len(columns),
-            0
-        )
-
-    def _assertTableColumnsEqual(self, relation_a, relation_b):
-        table_a_result = self.get_relation_columns(relation_a)
-        table_b_result = self.get_relation_columns(relation_b)
-
-        text_types = {'text', 'character varying', 'character', 'varchar'}
-
-        self.assertEqual(len(table_a_result), len(table_b_result))
-        for a_column, b_column in zip(table_a_result, table_b_result):
-            a_name, a_type, a_size = a_column
-            b_name, b_type, b_size = b_column
-            self.assertEqual(a_name, b_name,
-                '{} vs {}: column "{}" != "{}"'.format(
-                    relation_a, relation_b, a_name, b_name
-                ))
-
-            self.assertEqual(a_type, b_type,
-                '{} vs {}: column "{}" has type "{}" != "{}"'.format(
-                    relation_a, relation_b, a_name, a_type, b_type
-                ))
-
-            self.assertEqual(a_size, b_size,
-                '{} vs {}: column "{}" has size "{}" != "{}"'.format(
-                    relation_a, relation_b, a_name, a_size, b_size
-                ))
-
-    def assertEquals(self, *args, **kwargs):
-        # assertEquals is deprecated. This makes the warnings less chatty
-        self.assertEqual(*args, **kwargs)
-
-    def assertBetween(self, timestr, start, end=None):
-        datefmt = '%Y-%m-%dT%H:%M:%S.%fZ'
-        if end is None:
-            end = datetime.utcnow()
-
-        parsed = datetime.strptime(timestr, datefmt)
-
-        self.assertLessEqual(start, parsed,
-            'parsed date {} happened before {}'.format(
-                parsed,
-                start.strftime(datefmt))
-        )
-        self.assertGreaterEqual(end, parsed,
-            'parsed date {} happened after {}'.format(
-                parsed,
-                end.strftime(datefmt))
-        )
-
-
-def use_profile(profile_name):
-    """A decorator to declare a test method as using a particular profile.
-    Handles both setting the nose attr and calling self.use_profile.
-
-    Use like this:
-
-    class TestSomething(DBIntegrationTest):
-        @use_profile('postgres')
-        def test_postgres_thing(self):
-            self.assertEqual(self.adapter_type, 'postgres')
-
-        @use_profile('snowflake')
-        def test_snowflake_thing(self):
-            self.assertEqual(self.adapter_type, 'snowflake')
-    """
-    def outer(wrapped):
-        @getattr(pytest.mark, 'profile_'+profile_name)
-        @wraps(wrapped)
-        def func(self, *args, **kwargs):
-            return wrapped(self, *args, **kwargs)
-        # sanity check at import time
-        assert _profile_from_test_name(wrapped.__name__) == profile_name
-        return func
-    return outer
-
-
-class AnyFloat:
-    """Any float. Use this in assertEqual() calls to assert that it is a float.
-    """
-    def __eq__(self, other):
-        return isinstance(other, float)
-
-
-class AnyString:
-    """Any string. Use this in assertEqual() calls to assert that it is a string.
-    """
-    def __eq__(self, other):
-        return isinstance(other, str)
-
-
-class AnyStringWith:
-    def __init__(self, contains=None):
-        self.contains = contains
-
-    def __eq__(self, other):
-        if not isinstance(other, str):
-            return False
-
-        if self.contains is None:
-            return True
-
-        return self.contains in other
-
-    def __repr__(self):
-        return 'AnyStringWith<{!r}>'.format(self.contains)
-
-
-def get_manifest():
-    path = './target/partial_parse.msgpack'
-    if os.path.exists(path):
-        with open(path, 'rb') as fp:
-            manifest_mp = fp.read()
-        manifest: Manifest = Manifest.from_msgpack(manifest_mp)
-        return manifest
-    else:
-        return None
diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py
deleted file mode 100644
index b76bc6c31..000000000
--- a/tests/integration/conftest.py
+++ /dev/null
@@ -1,4 +0,0 @@
-def pytest_configure(config):
-    config.addinivalue_line("markers", "profile_databricks_cluster")
-    config.addinivalue_line("markers", "profile_databricks_sql_endpoint")
-    config.addinivalue_line("markers", "profile_apache_spark")
diff --git a/tests/integration/get_columns_in_relation/models/child.sql b/tests/integration/get_columns_in_relation/models/child.sql
deleted file mode 100644
index 2e3761f7a..000000000
--- a/tests/integration/get_columns_in_relation/models/child.sql
+++ /dev/null
@@ -1 +0,0 @@
-SELECT 1
diff --git a/tests/integration/get_columns_in_relation/models/get_columns_from_child.sql b/tests/integration/get_columns_in_relation/models/get_columns_from_child.sql
deleted file mode 100644
index 5118ae034..000000000
--- a/tests/integration/get_columns_in_relation/models/get_columns_from_child.sql
+++ /dev/null
@@ -1,6 +0,0 @@
-SELECT 
-  {% set cols = adapter.get_columns_in_relation(ref('child')) %}
-  {% for col in cols %}
-    {{ adapter.quote(col.column) }}{%- if not loop.last %},{{ '\n ' }}{% endif %}
-  {% endfor %}
-FROM {{ ref('child') }}
diff --git a/tests/integration/get_columns_in_relation/test_get_columns_in_relation.py b/tests/integration/get_columns_in_relation/test_get_columns_in_relation.py
deleted file mode 100644
index 418cbd99c..000000000
--- a/tests/integration/get_columns_in_relation/test_get_columns_in_relation.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from tests.integration.base import DBTIntegrationTest, use_profile
-
-
-class TestGetColumnInRelationInSameRun(DBTIntegrationTest):
-    @property
-    def schema(self):
-        return "get_columns_in_relation"
-
-    @property
-    def models(self):
-        return "models"
-
-    def run_and_test(self):
-        self.run_dbt(["run"])
-        self.assertTablesEqual("child", "get_columns_from_child")
-
-    @use_profile("apache_spark")
-    def test_get_columns_in_relation_in_same_run_apache_spark(self):
-        self.run_and_test()
-
-    @use_profile("databricks_cluster")
-    def test_get_columns_in_relation_in_same_run_databricks_cluster(self):
-        self.run_and_test()
-
-    @use_profile("databricks_sql_endpoint")
-    def test_get_columns_in_relation_in_same_run_databricks_sql_endpoint(self):
-        self.run_and_test()
diff --git a/tests/integration/incremental_strategies/models/default_append.sql b/tests/integration/incremental_strategies/models/default_append.sql
deleted file mode 100644
index e2a10393f..000000000
--- a/tests/integration/incremental_strategies/models/default_append.sql
+++ /dev/null
@@ -1,17 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_bad/bad_file_format.sql b/tests/integration/incremental_strategies/models_bad/bad_file_format.sql
deleted file mode 100644
index 911ccbb88..000000000
--- a/tests/integration/incremental_strategies/models_bad/bad_file_format.sql
+++ /dev/null
@@ -1,18 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    file_format = 'something_else',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_bad/bad_insert_overwrite_delta.sql b/tests/integration/incremental_strategies/models_bad/bad_insert_overwrite_delta.sql
deleted file mode 100644
index b7186e1b2..000000000
--- a/tests/integration/incremental_strategies/models_bad/bad_insert_overwrite_delta.sql
+++ /dev/null
@@ -1,19 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'insert_overwrite',
-    file_format = 'delta',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_bad/bad_merge_not_delta.sql b/tests/integration/incremental_strategies/models_bad/bad_merge_not_delta.sql
deleted file mode 100644
index 79a951110..000000000
--- a/tests/integration/incremental_strategies/models_bad/bad_merge_not_delta.sql
+++ /dev/null
@@ -1,18 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'merge',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_bad/bad_strategy.sql b/tests/integration/incremental_strategies/models_bad/bad_strategy.sql
deleted file mode 100644
index 72912f505..000000000
--- a/tests/integration/incremental_strategies/models_bad/bad_strategy.sql
+++ /dev/null
@@ -1,18 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'something_else',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_delta/append_delta.sql b/tests/integration/incremental_strategies/models_delta/append_delta.sql
deleted file mode 100644
index bfbd2512c..000000000
--- a/tests/integration/incremental_strategies/models_delta/append_delta.sql
+++ /dev/null
@@ -1,19 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'append',
-    file_format = 'delta',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_delta/merge_no_key.sql b/tests/integration/incremental_strategies/models_delta/merge_no_key.sql
deleted file mode 100644
index 35a71b1a0..000000000
--- a/tests/integration/incremental_strategies/models_delta/merge_no_key.sql
+++ /dev/null
@@ -1,19 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'merge',
-    file_format = 'delta',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_delta/merge_unique_key.sql b/tests/integration/incremental_strategies/models_delta/merge_unique_key.sql
deleted file mode 100644
index e8dfd37b9..000000000
--- a/tests/integration/incremental_strategies/models_delta/merge_unique_key.sql
+++ /dev/null
@@ -1,20 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'merge',
-    file_format = 'delta',
-    unique_key = 'id',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_delta/merge_update_columns.sql b/tests/integration/incremental_strategies/models_delta/merge_update_columns.sql
deleted file mode 100644
index d934b2997..000000000
--- a/tests/integration/incremental_strategies/models_delta/merge_update_columns.sql
+++ /dev/null
@@ -1,22 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'merge',
-    file_format = 'delta',
-    unique_key = 'id',
-    merge_update_columns = ['msg'],
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg, 'blue' as color
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg, 'red' as color
-
-{% else %}
-
--- msg will be updated, color will be ignored
-select cast(2 as bigint) as id, 'yo' as msg, 'green' as color
-union all
-select cast(3 as bigint) as id, 'anyway' as msg, 'purple' as color
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_hudi/append.sql b/tests/integration/incremental_strategies/models_hudi/append.sql
deleted file mode 100644
index 9be27bec3..000000000
--- a/tests/integration/incremental_strategies/models_hudi/append.sql
+++ /dev/null
@@ -1,19 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'append',
-    file_format = 'hudi',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_hudi/insert_overwrite_no_partitions.sql b/tests/integration/incremental_strategies/models_hudi/insert_overwrite_no_partitions.sql
deleted file mode 100644
index 081374089..000000000
--- a/tests/integration/incremental_strategies/models_hudi/insert_overwrite_no_partitions.sql
+++ /dev/null
@@ -1,19 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'insert_overwrite',
-    file_format = 'hudi',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_hudi/insert_overwrite_partitions.sql b/tests/integration/incremental_strategies/models_hudi/insert_overwrite_partitions.sql
deleted file mode 100644
index 0f74cfdb3..000000000
--- a/tests/integration/incremental_strategies/models_hudi/insert_overwrite_partitions.sql
+++ /dev/null
@@ -1,20 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'insert_overwrite',
-    partition_by = 'id',
-    file_format = 'hudi',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_hudi/merge_no_key.sql b/tests/integration/incremental_strategies/models_hudi/merge_no_key.sql
deleted file mode 100644
index 8def11ddf..000000000
--- a/tests/integration/incremental_strategies/models_hudi/merge_no_key.sql
+++ /dev/null
@@ -1,19 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'merge',
-    file_format = 'hudi',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_hudi/merge_unique_key.sql b/tests/integration/incremental_strategies/models_hudi/merge_unique_key.sql
deleted file mode 100644
index ee72860d2..000000000
--- a/tests/integration/incremental_strategies/models_hudi/merge_unique_key.sql
+++ /dev/null
@@ -1,20 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'merge',
-    file_format = 'hudi',
-    unique_key = 'id',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_hudi/merge_update_columns.sql b/tests/integration/incremental_strategies/models_hudi/merge_update_columns.sql
deleted file mode 100644
index 99f0d0b73..000000000
--- a/tests/integration/incremental_strategies/models_hudi/merge_update_columns.sql
+++ /dev/null
@@ -1,22 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'merge',
-    file_format = 'hudi',
-    unique_key = 'id',
-    merge_update_columns = ['msg'],
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg, 'blue' as color
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg, 'red' as color
-
-{% else %}
-
--- msg will be updated, color will be ignored
-select cast(2 as bigint) as id, 'yo' as msg, 'green' as color
-union all
-select cast(3 as bigint) as id, 'anyway' as msg, 'purple' as color
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql b/tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql
deleted file mode 100644
index ae007b45f..000000000
--- a/tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_no_partitions.sql
+++ /dev/null
@@ -1,19 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'insert_overwrite',
-    file_format = 'parquet',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_partitions.sql b/tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_partitions.sql
deleted file mode 100644
index cfe235ad2..000000000
--- a/tests/integration/incremental_strategies/models_insert_overwrite/insert_overwrite_partitions.sql
+++ /dev/null
@@ -1,20 +0,0 @@
-{{ config(
-    materialized = 'incremental',
-    incremental_strategy = 'insert_overwrite',
-    partition_by = 'id',
-    file_format = 'parquet',
-) }}
-
-{% if not is_incremental() %}
-
-select cast(1 as bigint) as id, 'hello' as msg
-union all
-select cast(2 as bigint) as id, 'goodbye' as msg
-
-{% else %}
-
-select cast(2 as bigint) as id, 'yo' as msg
-union all
-select cast(3 as bigint) as id, 'anyway' as msg
-
-{% endif %}
diff --git a/tests/integration/incremental_strategies/seeds/expected_append.csv b/tests/integration/incremental_strategies/seeds/expected_append.csv
deleted file mode 100644
index c96e569bd..000000000
--- a/tests/integration/incremental_strategies/seeds/expected_append.csv
+++ /dev/null
@@ -1,5 +0,0 @@
-id,msg
-1,hello
-2,goodbye
-2,yo
-3,anyway
\ No newline at end of file
diff --git a/tests/integration/incremental_strategies/seeds/expected_overwrite.csv b/tests/integration/incremental_strategies/seeds/expected_overwrite.csv
deleted file mode 100644
index 46d8f6050..000000000
--- a/tests/integration/incremental_strategies/seeds/expected_overwrite.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-id,msg
-2,yo
-3,anyway
\ No newline at end of file
diff --git a/tests/integration/incremental_strategies/seeds/expected_partial_upsert.csv b/tests/integration/incremental_strategies/seeds/expected_partial_upsert.csv
deleted file mode 100644
index bc922cdec..000000000
--- a/tests/integration/incremental_strategies/seeds/expected_partial_upsert.csv
+++ /dev/null
@@ -1,4 +0,0 @@
-id,msg,color
-1,hello,blue
-2,yo,red
-3,anyway,purple
\ No newline at end of file
diff --git a/tests/integration/incremental_strategies/seeds/expected_upsert.csv b/tests/integration/incremental_strategies/seeds/expected_upsert.csv
deleted file mode 100644
index 71805dfc5..000000000
--- a/tests/integration/incremental_strategies/seeds/expected_upsert.csv
+++ /dev/null
@@ -1,4 +0,0 @@
-id,msg
-1,hello
-2,yo
-3,anyway
\ No newline at end of file
diff --git a/tests/integration/incremental_strategies/test_incremental_strategies.py b/tests/integration/incremental_strategies/test_incremental_strategies.py
deleted file mode 100644
index 026f562db..000000000
--- a/tests/integration/incremental_strategies/test_incremental_strategies.py
+++ /dev/null
@@ -1,128 +0,0 @@
-from cProfile import run
-from tests.integration.base import DBTIntegrationTest, use_profile
-
-
-class TestIncrementalStrategies(DBTIntegrationTest):
-    @property
-    def schema(self):
-        return "incremental_strategies"
-
-    @property
-    def project_config(self):
-        return {
-            'seeds': {
-                'quote_columns': False,
-            },
-        }
-
-    def seed_and_run_once(self):
-        self.run_dbt(["seed"])
-        self.run_dbt(["run"])
-
-    def seed_and_run_twice(self):
-        self.run_dbt(["seed"])
-        self.run_dbt(["run"])
-        self.run_dbt(["run"])
-
-
-class TestDefaultAppend(TestIncrementalStrategies):
-    @property
-    def models(self):
-        return "models"
-        
-    def run_and_test(self):
-        self.seed_and_run_twice()
-        self.assertTablesEqual("default_append", "expected_append")
-
-    @use_profile("apache_spark")
-    def test_default_append_apache_spark(self):
-        self.run_and_test()
-
-    @use_profile("databricks_cluster")
-    def test_default_append_databricks_cluster(self):
-        self.run_and_test()
-
-
-class TestInsertOverwrite(TestIncrementalStrategies):
-    @property
-    def models(self):
-        return "models_insert_overwrite"
-
-    def run_and_test(self):
-        self.seed_and_run_twice()
-        self.assertTablesEqual(
-            "insert_overwrite_no_partitions", "expected_overwrite")
-        self.assertTablesEqual(
-            "insert_overwrite_partitions", "expected_upsert")
-
-    @use_profile("apache_spark")
-    def test_insert_overwrite_apache_spark(self):
-        self.run_and_test()
-
-    # This test requires settings on the test cluster
-    # more info at https://docs.getdbt.com/reference/resource-configs/spark-configs#the-insert_overwrite-strategy
-    @use_profile("databricks_cluster")
-    def test_insert_overwrite_databricks_cluster(self):
-        self.run_and_test()
-
-
-class TestDeltaStrategies(TestIncrementalStrategies):
-    @property
-    def models(self):
-        return "models_delta"
-
-    def run_and_test(self):
-        self.seed_and_run_twice()
-        self.assertTablesEqual("append_delta", "expected_append")
-        self.assertTablesEqual("merge_no_key", "expected_append")
-        self.assertTablesEqual("merge_unique_key", "expected_upsert")
-        self.assertTablesEqual("merge_update_columns", "expected_partial_upsert")
-
-    @use_profile("databricks_cluster")
-    def test_delta_strategies_databricks_cluster(self):
-        self.run_and_test()
-
-# Uncomment this hudi integration test after the hudi 0.10.0 release to make it work.
-# class TestHudiStrategies(TestIncrementalStrategies):
-#     @property
-#     def models(self):
-#         return "models_hudi"
-#
-#     def run_and_test(self):
-#         self.seed_and_run_once()
-#         self.assertTablesEqual("append", "expected_append")
-#         self.assertTablesEqual("merge_no_key", "expected_append")
-#         self.assertTablesEqual("merge_unique_key", "expected_upsert")
-#         self.assertTablesEqual(
-#             "insert_overwrite_no_partitions", "expected_overwrite")
-#         self.assertTablesEqual(
-#             "insert_overwrite_partitions", "expected_upsert")
-#
-#     @use_profile("apache_spark")
-#     def test_hudi_strategies_apache_spark(self):
-#         self.run_and_test()
-
-
-class TestBadStrategies(TestIncrementalStrategies):
-    @property
-    def models(self):
-        return "models_bad"
-
-    def run_and_test(self):
-        results = self.run_dbt(["run"], expect_pass=False)
-        # assert all models fail with compilation errors
-        for result in results:
-            self.assertEqual("error", result.status)
-            self.assertIn("Compilation Error in model", result.message)
-
-    @use_profile("apache_spark")
-    def test_bad_strategies_apache_spark(self):
-        self.run_and_test()
-
-    @use_profile("databricks_cluster")
-    def test_bad_strategies_databricks_cluster(self):
-        self.run_and_test()
-
-    @use_profile("databricks_sql_endpoint")
-    def test_bad_strategies_databricks_sql_endpoint(self):
-        self.run_and_test()
diff --git a/tests/integration/seed_column_types/seeds/payments.csv b/tests/integration/seed_column_types/seeds/payments.csv
deleted file mode 100644
index 3f49d788c..000000000
--- a/tests/integration/seed_column_types/seeds/payments.csv
+++ /dev/null
@@ -1,11 +0,0 @@
-ID,ORDERID,PAYMENTMETHOD,STATUS,AMOUNT,AMOUNT_USD,CREATED
-1,1,credit_card,success,1000,10.00,2018-01-01
-2,2,credit_card,success,2000,20.00,2018-01-02
-3,3,coupon,success,100,1.00,2018-01-04
-4,4,coupon,success,2500,25.00,2018-01-05
-5,5,bank_transfer,fail,1700,17.00,2018-01-05
-6,5,bank_transfer,success,1700,17.00,2018-01-05
-7,6,credit_card,success,600,6.00,2018-01-07
-8,7,credit_card,success,1600,16.00,2018-01-09
-9,8,credit_card,success,2300,23.00,2018-01-11
-10,9,gift_card,success,2300,23.00,2018-01-12
diff --git a/tests/integration/seed_column_types/test_seed_column_types.py b/tests/integration/seed_column_types/test_seed_column_types.py
deleted file mode 100644
index dc997d110..000000000
--- a/tests/integration/seed_column_types/test_seed_column_types.py
+++ /dev/null
@@ -1,35 +0,0 @@
-from cProfile import run
-from tests.integration.base import DBTIntegrationTest, use_profile
-
-
-class TestSeedColumnTypeCast(DBTIntegrationTest):
-    @property
-    def schema(self):
-        return "seed_column_types"
-        
-    @property
-    def models(self):
-        return "models"
-
-    @property
-    def project_config(self):
-        return {
-            'seeds': {
-                'quote_columns': False,
-            },
-        }
-
-    # runs on Spark v2.0
-    @use_profile("apache_spark")
-    def test_seed_column_types_apache_spark(self):
-        self.run_dbt(["seed"])
-
-    # runs on Spark v3.0
-    @use_profile("databricks_cluster")
-    def test_seed_column_types_databricks_cluster(self):
-        self.run_dbt(["seed"])
-
-    # runs on Spark v3.0
-    @use_profile("databricks_sql_endpoint")
-    def test_seed_column_types_databricks_sql_endpoint(self):
-        self.run_dbt(["seed"])
diff --git a/tests/unit/test_adapter.py b/tests/unit/test_adapter.py
index 2e63229f3..1107543ec 100644
--- a/tests/unit/test_adapter.py
+++ b/tests/unit/test_adapter.py
@@ -10,166 +10,186 @@
 
 
 class TestSparkAdapter(unittest.TestCase):
-
     def setUp(self):
         flags.STRICT_MODE = False
 
         self.project_cfg = {
-            'name': 'X',
-            'version': '0.1',
-            'profile': 'test',
-            'project-root': '/tmp/dbt/does-not-exist',
-            'quoting': {
-                'identifier': False,
-                'schema': False,
+            "name": "X",
+            "version": "0.1",
+            "profile": "test",
+            "project-root": "/tmp/dbt/does-not-exist",
+            "quoting": {
+                "identifier": False,
+                "schema": False,
             },
-            'config-version': 2
+            "config-version": 2,
         }
 
     def _get_target_http(self, project):
-        return config_from_parts_or_dicts(project, {
-            'outputs': {
-                'test': {
-                    'type': 'spark',
-                    'method': 'http',
-                    'schema': 'analytics',
-                    'host': 'myorg.sparkhost.com',
-                    'port': 443,
-                    'token': 'abc123',
-                    'organization': '0123456789',
-                    'cluster': '01234-23423-coffeetime',
-                }
+        return config_from_parts_or_dicts(
+            project,
+            {
+                "outputs": {
+                    "test": {
+                        "type": "spark",
+                        "method": "http",
+                        "schema": "analytics",
+                        "host": "myorg.sparkhost.com",
+                        "port": 443,
+                        "token": "abc123",
+                        "organization": "0123456789",
+                        "cluster": "01234-23423-coffeetime",
+                        "server_side_parameters": {"spark.driver.memory": "4g"},
+                    }
+                },
+                "target": "test",
             },
-            'target': 'test'
-        })
+        )
 
     def _get_target_thrift(self, project):
-        return config_from_parts_or_dicts(project, {
-            'outputs': {
-                'test': {
-                    'type': 'spark',
-                    'method': 'thrift',
-                    'schema': 'analytics',
-                    'host': 'myorg.sparkhost.com',
-                    'port': 10001,
-                    'user': 'dbt'
-                }
+        return config_from_parts_or_dicts(
+            project,
+            {
+                "outputs": {
+                    "test": {
+                        "type": "spark",
+                        "method": "thrift",
+                        "schema": "analytics",
+                        "host": "myorg.sparkhost.com",
+                        "port": 10001,
+                        "user": "dbt",
+                    }
+                },
+                "target": "test",
             },
-            'target': 'test'
-        })
+        )
 
     def _get_target_thrift_kerberos(self, project):
-        return config_from_parts_or_dicts(project, {
-            'outputs': {
-                'test': {
-                    'type': 'spark',
-                    'method': 'thrift',
-                    'schema': 'analytics',
-                    'host': 'myorg.sparkhost.com',
-                    'port': 10001,
-                    'user': 'dbt',
-                    'auth': 'KERBEROS',
-                    'kerberos_service_name': 'hive'
-                }
+        return config_from_parts_or_dicts(
+            project,
+            {
+                "outputs": {
+                    "test": {
+                        "type": "spark",
+                        "method": "thrift",
+                        "schema": "analytics",
+                        "host": "myorg.sparkhost.com",
+                        "port": 10001,
+                        "user": "dbt",
+                        "auth": "KERBEROS",
+                        "kerberos_service_name": "hive",
+                    }
+                },
+                "target": "test",
             },
-            'target': 'test'
-        })
+        )
 
     def _get_target_use_ssl_thrift(self, project):
-        return config_from_parts_or_dicts(project, {
-            'outputs': {
-                'test': {
-                    'type': 'spark',
-                    'method': 'thrift',
-                    'use_ssl': True,
-                    'schema': 'analytics',
-                    'host': 'myorg.sparkhost.com',
-                    'port': 10001,
-                    'user': 'dbt'
-                }
+        return config_from_parts_or_dicts(
+            project,
+            {
+                "outputs": {
+                    "test": {
+                        "type": "spark",
+                        "method": "thrift",
+                        "use_ssl": True,
+                        "schema": "analytics",
+                        "host": "myorg.sparkhost.com",
+                        "port": 10001,
+                        "user": "dbt",
+                    }
+                },
+                "target": "test",
             },
-            'target': 'test'
-        })
+        )
 
     def _get_target_odbc_cluster(self, project):
-        return config_from_parts_or_dicts(project, {
-            'outputs': {
-                'test': {
-                    'type': 'spark',
-                    'method': 'odbc',
-                    'schema': 'analytics',
-                    'host': 'myorg.sparkhost.com',
-                    'port': 443,
-                    'token': 'abc123',
-                    'organization': '0123456789',
-                    'cluster': '01234-23423-coffeetime',
-                    'driver': 'Simba',
-                }
+        return config_from_parts_or_dicts(
+            project,
+            {
+                "outputs": {
+                    "test": {
+                        "type": "spark",
+                        "method": "odbc",
+                        "schema": "analytics",
+                        "host": "myorg.sparkhost.com",
+                        "port": 443,
+                        "token": "abc123",
+                        "organization": "0123456789",
+                        "cluster": "01234-23423-coffeetime",
+                        "driver": "Simba",
+                    }
+                },
+                "target": "test",
             },
-            'target': 'test'
-        })
+        )
 
     def _get_target_odbc_sql_endpoint(self, project):
-        return config_from_parts_or_dicts(project, {
-            'outputs': {
-                'test': {
-                    'type': 'spark',
-                    'method': 'odbc',
-                    'schema': 'analytics',
-                    'host': 'myorg.sparkhost.com',
-                    'port': 443,
-                    'token': 'abc123',
-                    'endpoint': '012342342393920a',
-                    'driver': 'Simba',
-                }
+        return config_from_parts_or_dicts(
+            project,
+            {
+                "outputs": {
+                    "test": {
+                        "type": "spark",
+                        "method": "odbc",
+                        "schema": "analytics",
+                        "host": "myorg.sparkhost.com",
+                        "port": 443,
+                        "token": "abc123",
+                        "endpoint": "012342342393920a",
+                        "driver": "Simba",
+                    }
+                },
+                "target": "test",
             },
-            'target': 'test'
-        })
+        )
 
     def test_http_connection(self):
         config = self._get_target_http(self.project_cfg)
         adapter = SparkAdapter(config)
 
-        def hive_http_connect(thrift_transport):
-            self.assertEqual(thrift_transport.scheme, 'https')
+        def hive_http_connect(thrift_transport, configuration):
+            self.assertEqual(thrift_transport.scheme, "https")
             self.assertEqual(thrift_transport.port, 443)
-            self.assertEqual(thrift_transport.host, 'myorg.sparkhost.com')
+            self.assertEqual(thrift_transport.host, "myorg.sparkhost.com")
             self.assertEqual(
-                thrift_transport.path, '/sql/protocolv1/o/0123456789/01234-23423-coffeetime')
+                thrift_transport.path, "/sql/protocolv1/o/0123456789/01234-23423-coffeetime"
+            )
+            self.assertEqual(configuration["spark.driver.memory"], "4g")
 
         # with mock.patch.object(hive, 'connect', new=hive_http_connect):
-        with mock.patch('dbt.adapters.spark.connections.hive.connect', new=hive_http_connect):
-            connection = adapter.acquire_connection('dummy')
+        with mock.patch("dbt.adapters.spark.connections.hive.connect", new=hive_http_connect):
+            connection = adapter.acquire_connection("dummy")
             connection.handle  # trigger lazy-load
 
-            self.assertEqual(connection.state, 'open')
+            self.assertEqual(connection.state, "open")
             self.assertIsNotNone(connection.handle)
-            self.assertEqual(connection.credentials.cluster,
-                             '01234-23423-coffeetime')
-            self.assertEqual(connection.credentials.token, 'abc123')
-            self.assertEqual(connection.credentials.schema, 'analytics')
+            self.assertEqual(connection.credentials.cluster, "01234-23423-coffeetime")
+            self.assertEqual(connection.credentials.token, "abc123")
+            self.assertEqual(connection.credentials.schema, "analytics")
             self.assertIsNone(connection.credentials.database)
 
     def test_thrift_connection(self):
         config = self._get_target_thrift(self.project_cfg)
         adapter = SparkAdapter(config)
 
+
         def hive_thrift_connect(host, port, username, auth, kerberos_service_name, password, configuration):
             self.assertEqual(host, 'myorg.sparkhost.com')
             self.assertEqual(port, 10001)
-            self.assertEqual(username, 'dbt')
+            self.assertEqual(username, "dbt")
             self.assertIsNone(auth)
             self.assertIsNone(kerberos_service_name)
             self.assertIsNone(password)
             self.assertDictEqual(configuration, {})
 
-        with mock.patch.object(hive, 'connect', new=hive_thrift_connect):
-            connection = adapter.acquire_connection('dummy')
+        with mock.patch.object(hive, "connect", new=hive_thrift_connect):
+            connection = adapter.acquire_connection("dummy")
             connection.handle  # trigger lazy-load
 
-            self.assertEqual(connection.state, 'open')
+            self.assertEqual(connection.state, "open")
             self.assertIsNotNone(connection.handle)
-            self.assertEqual(connection.credentials.schema, 'analytics')
+            self.assertEqual(connection.credentials.schema, "analytics")
             self.assertIsNone(connection.credentials.database)
 
     def test_thrift_ssl_connection(self):
@@ -179,17 +199,17 @@ def test_thrift_ssl_connection(self):
         def hive_thrift_connect(thrift_transport, configuration):
             self.assertIsNotNone(thrift_transport)
             transport = thrift_transport._trans
-            self.assertEqual(transport.host, 'myorg.sparkhost.com')
+            self.assertEqual(transport.host, "myorg.sparkhost.com")
             self.assertEqual(transport.port, 10001)
             self.assertDictEqual(configuration, {})
 
-        with mock.patch.object(hive, 'connect', new=hive_thrift_connect):
-            connection = adapter.acquire_connection('dummy')
+        with mock.patch.object(hive, "connect", new=hive_thrift_connect):
+            connection = adapter.acquire_connection("dummy")
             connection.handle  # trigger lazy-load
 
-            self.assertEqual(connection.state, 'open')
+            self.assertEqual(connection.state, "open")
             self.assertIsNotNone(connection.handle)
-            self.assertEqual(connection.credentials.schema, 'analytics')
+            self.assertEqual(connection.credentials.schema, "analytics")
             self.assertIsNone(connection.credentials.database)
 
     def test_thrift_connection_kerberos(self):
@@ -199,19 +219,19 @@ def test_thrift_connection_kerberos(self):
         def hive_thrift_connect(host, port, username, auth, kerberos_service_name, password, configuration):
             self.assertEqual(host, 'myorg.sparkhost.com')
             self.assertEqual(port, 10001)
-            self.assertEqual(username, 'dbt')
-            self.assertEqual(auth, 'KERBEROS')
-            self.assertEqual(kerberos_service_name, 'hive')
+            self.assertEqual(username, "dbt")
+            self.assertEqual(auth, "KERBEROS")
+            self.assertEqual(kerberos_service_name, "hive")
             self.assertIsNone(password)
             self.assertDictEqual(configuration, {})
 
-        with mock.patch.object(hive, 'connect', new=hive_thrift_connect):
-            connection = adapter.acquire_connection('dummy')
+        with mock.patch.object(hive, "connect", new=hive_thrift_connect):
+            connection = adapter.acquire_connection("dummy")
             connection.handle  # trigger lazy-load
 
-            self.assertEqual(connection.state, 'open')
+            self.assertEqual(connection.state, "open")
             self.assertIsNotNone(connection.handle)
-            self.assertEqual(connection.credentials.schema, 'analytics')
+            self.assertEqual(connection.credentials.schema, "analytics")
             self.assertIsNone(connection.credentials.database)
 
     def test_odbc_cluster_connection(self):
@@ -220,23 +240,25 @@ def test_odbc_cluster_connection(self):
 
         def pyodbc_connect(connection_str, autocommit):
             self.assertTrue(autocommit)
-            self.assertIn('driver=simba;', connection_str.lower())
-            self.assertIn('port=443;', connection_str.lower())
-            self.assertIn('host=myorg.sparkhost.com;',
-                          connection_str.lower())
+            self.assertIn("driver=simba;", connection_str.lower())
+            self.assertIn("port=443;", connection_str.lower())
+            self.assertIn("host=myorg.sparkhost.com;", connection_str.lower())
             self.assertIn(
-                'httppath=/sql/protocolv1/o/0123456789/01234-23423-coffeetime;', connection_str.lower())  # noqa
-
-        with mock.patch('dbt.adapters.spark.connections.pyodbc.connect', new=pyodbc_connect):  # noqa
-            connection = adapter.acquire_connection('dummy')
+                "httppath=/sql/protocolv1/o/0123456789/01234-23423-coffeetime;",
+                connection_str.lower(),
+            )  # noqa
+
+        with mock.patch(
+            "dbt.adapters.spark.connections.pyodbc.connect", new=pyodbc_connect
+        ):  # noqa
+            connection = adapter.acquire_connection("dummy")
             connection.handle  # trigger lazy-load
 
-            self.assertEqual(connection.state, 'open')
+            self.assertEqual(connection.state, "open")
             self.assertIsNotNone(connection.handle)
-            self.assertEqual(connection.credentials.cluster,
-                             '01234-23423-coffeetime')
-            self.assertEqual(connection.credentials.token, 'abc123')
-            self.assertEqual(connection.credentials.schema, 'analytics')
+            self.assertEqual(connection.credentials.cluster, "01234-23423-coffeetime")
+            self.assertEqual(connection.credentials.token, "abc123")
+            self.assertEqual(connection.credentials.schema, "analytics")
             self.assertIsNone(connection.credentials.database)
 
     def test_odbc_endpoint_connection(self):
@@ -245,23 +267,24 @@ def test_odbc_endpoint_connection(self):
 
         def pyodbc_connect(connection_str, autocommit):
             self.assertTrue(autocommit)
-            self.assertIn('driver=simba;', connection_str.lower())
-            self.assertIn('port=443;', connection_str.lower())
-            self.assertIn('host=myorg.sparkhost.com;',
-                          connection_str.lower())
+            self.assertIn("driver=simba;", connection_str.lower())
+            self.assertIn("port=443;", connection_str.lower())
+            self.assertIn("host=myorg.sparkhost.com;", connection_str.lower())
             self.assertIn(
-                'httppath=/sql/1.0/endpoints/012342342393920a;', connection_str.lower())  # noqa
+                "httppath=/sql/1.0/endpoints/012342342393920a;", connection_str.lower()
+            )  # noqa
 
-        with mock.patch('dbt.adapters.spark.connections.pyodbc.connect', new=pyodbc_connect):  # noqa
-            connection = adapter.acquire_connection('dummy')
+        with mock.patch(
+            "dbt.adapters.spark.connections.pyodbc.connect", new=pyodbc_connect
+        ):  # noqa
+            connection = adapter.acquire_connection("dummy")
             connection.handle  # trigger lazy-load
 
-            self.assertEqual(connection.state, 'open')
+            self.assertEqual(connection.state, "open")
             self.assertIsNotNone(connection.handle)
-            self.assertEqual(connection.credentials.endpoint,
-                             '012342342393920a')
-            self.assertEqual(connection.credentials.token, 'abc123')
-            self.assertEqual(connection.credentials.schema, 'analytics')
+            self.assertEqual(connection.credentials.endpoint, "012342342393920a")
+            self.assertEqual(connection.credentials.token, "abc123")
+            self.assertEqual(connection.credentials.schema, "analytics")
             self.assertIsNone(connection.credentials.database)
 
     def test_parse_relation(self):
@@ -269,235 +292,238 @@ def test_parse_relation(self):
         rel_type = SparkRelation.get_relation_type.Table
 
         relation = SparkRelation.create(
-            schema='default_schema',
-            identifier='mytable',
-            type=rel_type
+            schema="default_schema", identifier="mytable", type=rel_type
         )
         assert relation.database is None
 
         # Mimics the output of Spark with a DESCRIBE TABLE EXTENDED
         plain_rows = [
-            ('col1', 'decimal(22,0)'),
-            ('col2', 'string',),
-            ('dt', 'date'),
-            ('struct_col', 'struct'),
-            ('# Partition Information', 'data_type'),
-            ('# col_name', 'data_type'),
-            ('dt', 'date'),
+            ("col1", "decimal(22,0)"),
+            (
+                "col2",
+                "string",
+            ),
+            ("dt", "date"),
+            ("struct_col", "struct"),
+            ("# Partition Information", "data_type"),
+            ("# col_name", "data_type"),
+            ("dt", "date"),
             (None, None),
-            ('# Detailed Table Information', None),
-            ('Database', None),
-            ('Owner', 'root'),
-            ('Created Time', 'Wed Feb 04 18:15:00 UTC 1815'),
-            ('Last Access', 'Wed May 20 19:25:00 UTC 1925'),
-            ('Type', 'MANAGED'),
-            ('Provider', 'delta'),
-            ('Location', '/mnt/vo'),
-            ('Serde Library', 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'),
-            ('InputFormat', 'org.apache.hadoop.mapred.SequenceFileInputFormat'),
-            ('OutputFormat', 'org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'),
-            ('Partition Provider', 'Catalog')
+            ("# Detailed Table Information", None),
+            ("Database", None),
+            ("Owner", "root"),
+            ("Created Time", "Wed Feb 04 18:15:00 UTC 1815"),
+            ("Last Access", "Wed May 20 19:25:00 UTC 1925"),
+            ("Type", "MANAGED"),
+            ("Provider", "delta"),
+            ("Location", "/mnt/vo"),
+            ("Serde Library", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"),
+            ("InputFormat", "org.apache.hadoop.mapred.SequenceFileInputFormat"),
+            ("OutputFormat", "org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat"),
+            ("Partition Provider", "Catalog"),
         ]
 
-        input_cols = [Row(keys=['col_name', 'data_type'], values=r)
-                      for r in plain_rows]
+        input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows]
 
         config = self._get_target_http(self.project_cfg)
-        rows = SparkAdapter(config).parse_describe_extended(
-            relation, input_cols)
+        rows = SparkAdapter(config).parse_describe_extended(relation, input_cols)
         self.assertEqual(len(rows), 4)
-        self.assertEqual(rows[0].to_column_dict(omit_none=False), {
-            'table_database': None,
-            'table_schema': relation.schema,
-            'table_name': relation.name,
-            'table_type': rel_type,
-            'table_owner': 'root',
-            'column': 'col1',
-            'column_index': 0,
-            'dtype': 'decimal(22,0)',
-            'numeric_scale': None,
-            'numeric_precision': None,
-            'char_size': None
-        })
+        self.assertEqual(
+            rows[0].to_column_dict(omit_none=False),
+            {
+                "table_database": None,
+                "table_schema": relation.schema,
+                "table_name": relation.name,
+                "table_type": rel_type,
+                "table_owner": "root",
+                "column": "col1",
+                "column_index": 0,
+                "dtype": "decimal(22,0)",
+                "numeric_scale": None,
+                "numeric_precision": None,
+                "char_size": None,
+            },
+        )
 
-        self.assertEqual(rows[1].to_column_dict(omit_none=False), {
-            'table_database': None,
-            'table_schema': relation.schema,
-            'table_name': relation.name,
-            'table_type': rel_type,
-            'table_owner': 'root',
-            'column': 'col2',
-            'column_index': 1,
-            'dtype': 'string',
-            'numeric_scale': None,
-            'numeric_precision': None,
-            'char_size': None
-        })
+        self.assertEqual(
+            rows[1].to_column_dict(omit_none=False),
+            {
+                "table_database": None,
+                "table_schema": relation.schema,
+                "table_name": relation.name,
+                "table_type": rel_type,
+                "table_owner": "root",
+                "column": "col2",
+                "column_index": 1,
+                "dtype": "string",
+                "numeric_scale": None,
+                "numeric_precision": None,
+                "char_size": None,
+            },
+        )
 
-        self.assertEqual(rows[2].to_column_dict(omit_none=False), {
-            'table_database': None,
-            'table_schema': relation.schema,
-            'table_name': relation.name,
-            'table_type': rel_type,
-            'table_owner': 'root',
-            'column': 'dt',
-            'column_index': 2,
-            'dtype': 'date',
-            'numeric_scale': None,
-            'numeric_precision': None,
-            'char_size': None
-        })
+        self.assertEqual(
+            rows[2].to_column_dict(omit_none=False),
+            {
+                "table_database": None,
+                "table_schema": relation.schema,
+                "table_name": relation.name,
+                "table_type": rel_type,
+                "table_owner": "root",
+                "column": "dt",
+                "column_index": 2,
+                "dtype": "date",
+                "numeric_scale": None,
+                "numeric_precision": None,
+                "char_size": None,
+            },
+        )
 
-        self.assertEqual(rows[3].to_column_dict(omit_none=False), {
-            'table_database': None,
-            'table_schema': relation.schema,
-            'table_name': relation.name,
-            'table_type': rel_type,
-            'table_owner': 'root',
-            'column': 'struct_col',
-            'column_index': 3,
-            'dtype': 'struct',
-            'numeric_scale': None,
-            'numeric_precision': None,
-            'char_size': None
-        })
+        self.assertEqual(
+            rows[3].to_column_dict(omit_none=False),
+            {
+                "table_database": None,
+                "table_schema": relation.schema,
+                "table_name": relation.name,
+                "table_type": rel_type,
+                "table_owner": "root",
+                "column": "struct_col",
+                "column_index": 3,
+                "dtype": "struct",
+                "numeric_scale": None,
+                "numeric_precision": None,
+                "char_size": None,
+            },
+        )
 
     def test_parse_relation_with_integer_owner(self):
         self.maxDiff = None
         rel_type = SparkRelation.get_relation_type.Table
 
         relation = SparkRelation.create(
-            schema='default_schema',
-            identifier='mytable',
-            type=rel_type
+            schema="default_schema", identifier="mytable", type=rel_type
         )
         assert relation.database is None
 
         # Mimics the output of Spark with a DESCRIBE TABLE EXTENDED
         plain_rows = [
-            ('col1', 'decimal(22,0)'),
-            ('# Detailed Table Information', None),
-            ('Owner', 1234)
+            ("col1", "decimal(22,0)"),
+            ("# Detailed Table Information", None),
+            ("Owner", 1234),
         ]
 
-        input_cols = [Row(keys=['col_name', 'data_type'], values=r)
-                      for r in plain_rows]
+        input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows]
 
         config = self._get_target_http(self.project_cfg)
-        rows = SparkAdapter(config).parse_describe_extended(
-            relation, input_cols)
+        rows = SparkAdapter(config).parse_describe_extended(relation, input_cols)
 
-        self.assertEqual(rows[0].to_column_dict().get('table_owner'), '1234')
+        self.assertEqual(rows[0].to_column_dict().get("table_owner"), "1234")
 
     def test_parse_relation_with_statistics(self):
         self.maxDiff = None
         rel_type = SparkRelation.get_relation_type.Table
 
         relation = SparkRelation.create(
-            schema='default_schema',
-            identifier='mytable',
-            type=rel_type
+            schema="default_schema", identifier="mytable", type=rel_type
         )
         assert relation.database is None
 
         # Mimics the output of Spark with a DESCRIBE TABLE EXTENDED
         plain_rows = [
-            ('col1', 'decimal(22,0)'),
-            ('# Partition Information', 'data_type'),
+            ("col1", "decimal(22,0)"),
+            ("# Partition Information", "data_type"),
             (None, None),
-            ('# Detailed Table Information', None),
-            ('Database', None),
-            ('Owner', 'root'),
-            ('Created Time', 'Wed Feb 04 18:15:00 UTC 1815'),
-            ('Last Access', 'Wed May 20 19:25:00 UTC 1925'),
-            ('Statistics', '1109049927 bytes, 14093476 rows'),
-            ('Type', 'MANAGED'),
-            ('Provider', 'delta'),
-            ('Location', '/mnt/vo'),
-            ('Serde Library', 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'),
-            ('InputFormat', 'org.apache.hadoop.mapred.SequenceFileInputFormat'),
-            ('OutputFormat', 'org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat'),
-            ('Partition Provider', 'Catalog')
+            ("# Detailed Table Information", None),
+            ("Database", None),
+            ("Owner", "root"),
+            ("Created Time", "Wed Feb 04 18:15:00 UTC 1815"),
+            ("Last Access", "Wed May 20 19:25:00 UTC 1925"),
+            ("Statistics", "1109049927 bytes, 14093476 rows"),
+            ("Type", "MANAGED"),
+            ("Provider", "delta"),
+            ("Location", "/mnt/vo"),
+            ("Serde Library", "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"),
+            ("InputFormat", "org.apache.hadoop.mapred.SequenceFileInputFormat"),
+            ("OutputFormat", "org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat"),
+            ("Partition Provider", "Catalog"),
         ]
 
-        input_cols = [Row(keys=['col_name', 'data_type'], values=r)
-                      for r in plain_rows]
+        input_cols = [Row(keys=["col_name", "data_type"], values=r) for r in plain_rows]
 
         config = self._get_target_http(self.project_cfg)
-        rows = SparkAdapter(config).parse_describe_extended(
-            relation, input_cols)
+        rows = SparkAdapter(config).parse_describe_extended(relation, input_cols)
         self.assertEqual(len(rows), 1)
-        self.assertEqual(rows[0].to_column_dict(omit_none=False), {
-            'table_database': None,
-            'table_schema': relation.schema,
-            'table_name': relation.name,
-            'table_type': rel_type,
-            'table_owner': 'root',
-            'column': 'col1',
-            'column_index': 0,
-            'dtype': 'decimal(22,0)',
-            'numeric_scale': None,
-            'numeric_precision': None,
-            'char_size': None,
-
-            'stats:bytes:description': '',
-            'stats:bytes:include': True,
-            'stats:bytes:label': 'bytes',
-            'stats:bytes:value': 1109049927,
-
-            'stats:rows:description': '',
-            'stats:rows:include': True,
-            'stats:rows:label': 'rows',
-            'stats:rows:value': 14093476,
-        })
+        self.assertEqual(
+            rows[0].to_column_dict(omit_none=False),
+            {
+                "table_database": None,
+                "table_schema": relation.schema,
+                "table_name": relation.name,
+                "table_type": rel_type,
+                "table_owner": "root",
+                "column": "col1",
+                "column_index": 0,
+                "dtype": "decimal(22,0)",
+                "numeric_scale": None,
+                "numeric_precision": None,
+                "char_size": None,
+                "stats:bytes:description": "",
+                "stats:bytes:include": True,
+                "stats:bytes:label": "bytes",
+                "stats:bytes:value": 1109049927,
+                "stats:rows:description": "",
+                "stats:rows:include": True,
+                "stats:rows:label": "rows",
+                "stats:rows:value": 14093476,
+            },
+        )
 
     def test_relation_with_database(self):
         config = self._get_target_http(self.project_cfg)
         adapter = SparkAdapter(config)
         # fine
-        adapter.Relation.create(schema='different', identifier='table')
+        adapter.Relation.create(schema="different", identifier="table")
         with self.assertRaises(DbtRuntimeError):
             # not fine - database set
-            adapter.Relation.create(
-                database='something', schema='different', identifier='table')
+            adapter.Relation.create(database="something", schema="different", identifier="table")
 
     def test_profile_with_database(self):
         profile = {
-            'outputs': {
-                'test': {
-                    'type': 'spark',
-                    'method': 'http',
+            "outputs": {
+                "test": {
+                    "type": "spark",
+                    "method": "http",
                     # not allowed
-                    'database': 'analytics2',
-                    'schema': 'analytics',
-                    'host': 'myorg.sparkhost.com',
-                    'port': 443,
-                    'token': 'abc123',
-                    'organization': '0123456789',
-                    'cluster': '01234-23423-coffeetime',
+                    "database": "analytics2",
+                    "schema": "analytics",
+                    "host": "myorg.sparkhost.com",
+                    "port": 443,
+                    "token": "abc123",
+                    "organization": "0123456789",
+                    "cluster": "01234-23423-coffeetime",
                 }
             },
-            'target': 'test'
+            "target": "test",
         }
         with self.assertRaises(DbtRuntimeError):
             config_from_parts_or_dicts(self.project_cfg, profile)
 
     def test_profile_with_cluster_and_sql_endpoint(self):
         profile = {
-            'outputs': {
-                'test': {
-                    'type': 'spark',
-                    'method': 'odbc',
-                    'schema': 'analytics',
-                    'host': 'myorg.sparkhost.com',
-                    'port': 443,
-                    'token': 'abc123',
-                    'organization': '0123456789',
-                    'cluster': '01234-23423-coffeetime',
-                    'endpoint': '0123412341234e',
+            "outputs": {
+                "test": {
+                    "type": "spark",
+                    "method": "odbc",
+                    "schema": "analytics",
+                    "host": "myorg.sparkhost.com",
+                    "port": 443,
+                    "token": "abc123",
+                    "organization": "0123456789",
+                    "cluster": "01234-23423-coffeetime",
+                    "endpoint": "0123412341234e",
                 }
             },
-            'target': 'test'
+            "target": "test",
         }
         with self.assertRaises(DbtRuntimeError):
             config_from_parts_or_dicts(self.project_cfg, profile)
@@ -531,53 +557,53 @@ def test_parse_columns_from_information_with_table_type_and_delta_provider(self)
             " |    |-- struct_inner_col: string (nullable = true)\n"
         )
         relation = SparkRelation.create(
-            schema='default_schema',
-            identifier='mytable',
-            type=rel_type,
-            information=information
+            schema="default_schema", identifier="mytable", type=rel_type, information=information
         )
 
         config = self._get_target_http(self.project_cfg)
-        columns = SparkAdapter(config).parse_columns_from_information(
-            relation)
+        columns = SparkAdapter(config).parse_columns_from_information(relation)
         self.assertEqual(len(columns), 4)
-        self.assertEqual(columns[0].to_column_dict(omit_none=False), {
-            'table_database': None,
-            'table_schema': relation.schema,
-            'table_name': relation.name,
-            'table_type': rel_type,
-            'table_owner': 'root',
-            'column': 'col1',
-            'column_index': 0,
-            'dtype': 'decimal(22,0)',
-            'numeric_scale': None,
-            'numeric_precision': None,
-            'char_size': None,
-
-            'stats:bytes:description': '',
-            'stats:bytes:include': True,
-            'stats:bytes:label': 'bytes',
-            'stats:bytes:value': 123456789,
-        })
-
-        self.assertEqual(columns[3].to_column_dict(omit_none=False), {
-            'table_database': None,
-            'table_schema': relation.schema,
-            'table_name': relation.name,
-            'table_type': rel_type,
-            'table_owner': 'root',
-            'column': 'struct_col',
-            'column_index': 3,
-            'dtype': 'struct',
-            'numeric_scale': None,
-            'numeric_precision': None,
-            'char_size': None,
+        self.assertEqual(
+            columns[0].to_column_dict(omit_none=False),
+            {
+                "table_database": None,
+                "table_schema": relation.schema,
+                "table_name": relation.name,
+                "table_type": rel_type,
+                "table_owner": "root",
+                "column": "col1",
+                "column_index": 0,
+                "dtype": "decimal(22,0)",
+                "numeric_scale": None,
+                "numeric_precision": None,
+                "char_size": None,
+                "stats:bytes:description": "",
+                "stats:bytes:include": True,
+                "stats:bytes:label": "bytes",
+                "stats:bytes:value": 123456789,
+            },
+        )
 
-            'stats:bytes:description': '',
-            'stats:bytes:include': True,
-            'stats:bytes:label': 'bytes',
-            'stats:bytes:value': 123456789,
-        })
+        self.assertEqual(
+            columns[3].to_column_dict(omit_none=False),
+            {
+                "table_database": None,
+                "table_schema": relation.schema,
+                "table_name": relation.name,
+                "table_type": rel_type,
+                "table_owner": "root",
+                "column": "struct_col",
+                "column_index": 3,
+                "dtype": "struct",
+                "numeric_scale": None,
+                "numeric_precision": None,
+                "char_size": None,
+                "stats:bytes:description": "",
+                "stats:bytes:include": True,
+                "stats:bytes:label": "bytes",
+                "stats:bytes:value": 123456789,
+            },
+        )
 
     def test_parse_columns_from_information_with_view_type(self):
         self.maxDiff = None
@@ -616,43 +642,45 @@ def test_parse_columns_from_information_with_view_type(self):
             " |    |-- struct_inner_col: string (nullable = true)\n"
         )
         relation = SparkRelation.create(
-            schema='default_schema',
-            identifier='myview',
-            type=rel_type,
-            information=information
+            schema="default_schema", identifier="myview", type=rel_type, information=information
         )
 
         config = self._get_target_http(self.project_cfg)
-        columns = SparkAdapter(config).parse_columns_from_information(
-            relation)
+        columns = SparkAdapter(config).parse_columns_from_information(relation)
         self.assertEqual(len(columns), 4)
-        self.assertEqual(columns[1].to_column_dict(omit_none=False), {
-            'table_database': None,
-            'table_schema': relation.schema,
-            'table_name': relation.name,
-            'table_type': rel_type,
-            'table_owner': 'root',
-            'column': 'col2',
-            'column_index': 1,
-            'dtype': 'string',
-            'numeric_scale': None,
-            'numeric_precision': None,
-            'char_size': None
-        })
+        self.assertEqual(
+            columns[1].to_column_dict(omit_none=False),
+            {
+                "table_database": None,
+                "table_schema": relation.schema,
+                "table_name": relation.name,
+                "table_type": rel_type,
+                "table_owner": "root",
+                "column": "col2",
+                "column_index": 1,
+                "dtype": "string",
+                "numeric_scale": None,
+                "numeric_precision": None,
+                "char_size": None,
+            },
+        )
 
-        self.assertEqual(columns[3].to_column_dict(omit_none=False), {
-            'table_database': None,
-            'table_schema': relation.schema,
-            'table_name': relation.name,
-            'table_type': rel_type,
-            'table_owner': 'root',
-            'column': 'struct_col',
-            'column_index': 3,
-            'dtype': 'struct',
-            'numeric_scale': None,
-            'numeric_precision': None,
-            'char_size': None
-        })
+        self.assertEqual(
+            columns[3].to_column_dict(omit_none=False),
+            {
+                "table_database": None,
+                "table_schema": relation.schema,
+                "table_name": relation.name,
+                "table_type": rel_type,
+                "table_owner": "root",
+                "column": "struct_col",
+                "column_index": 3,
+                "dtype": "struct",
+                "numeric_scale": None,
+                "numeric_precision": None,
+                "char_size": None,
+            },
+        )
 
     def test_parse_columns_from_information_with_table_type_and_parquet_provider(self):
         self.maxDiff = None
@@ -680,16 +708,13 @@ def test_parse_columns_from_information_with_table_type_and_parquet_provider(sel
             " |    |-- struct_inner_col: string (nullable = true)\n"
         )
         relation = SparkRelation.create(
-            schema='default_schema',
-            identifier='mytable',
-            type=rel_type,
-            information=information
+            schema="default_schema", identifier="mytable", type=rel_type, information=information
         )
 
         config = self._get_target_http(self.project_cfg)
-        columns = SparkAdapter(config).parse_columns_from_information(
-            relation)
+        columns = SparkAdapter(config).parse_columns_from_information(relation)
         self.assertEqual(len(columns), 4)
+
         self.assertEqual(columns[2].to_column_dict(omit_none=False), {
             'table_database': None,
             'table_schema': relation.schema,
diff --git a/tests/unit/test_column.py b/tests/unit/test_column.py
index f7f8d8776..0132ad88a 100644
--- a/tests/unit/test_column.py
+++ b/tests/unit/test_column.py
@@ -4,35 +4,31 @@
 
 
 class TestSparkColumn(unittest.TestCase):
-
     def test_convert_table_stats_with_no_statistics(self):
-        self.assertDictEqual(
-            SparkColumn.convert_table_stats(None),
-            {}
-        )
+        self.assertDictEqual(SparkColumn.convert_table_stats(None), {})
 
     def test_convert_table_stats_with_bytes(self):
         self.assertDictEqual(
             SparkColumn.convert_table_stats("123456789 bytes"),
             {
-                'stats:bytes:description': '',
-                'stats:bytes:include': True,
-                'stats:bytes:label': 'bytes',
-                'stats:bytes:value': 123456789
-            }
+                "stats:bytes:description": "",
+                "stats:bytes:include": True,
+                "stats:bytes:label": "bytes",
+                "stats:bytes:value": 123456789,
+            },
         )
 
     def test_convert_table_stats_with_bytes_and_rows(self):
         self.assertDictEqual(
             SparkColumn.convert_table_stats("1234567890 bytes, 12345678 rows"),
             {
-                'stats:bytes:description': '',
-                'stats:bytes:include': True,
-                'stats:bytes:label': 'bytes',
-                'stats:bytes:value': 1234567890,
-                'stats:rows:description': '',
-                'stats:rows:include': True,
-                'stats:rows:label': 'rows',
-                'stats:rows:value': 12345678
-            }
+                "stats:bytes:description": "",
+                "stats:bytes:include": True,
+                "stats:bytes:label": "bytes",
+                "stats:bytes:value": 1234567890,
+                "stats:rows:description": "",
+                "stats:rows:include": True,
+                "stats:rows:label": "rows",
+                "stats:rows:value": 12345678,
+            },
         )
diff --git a/tests/unit/test_credentials.py b/tests/unit/test_credentials.py
new file mode 100644
index 000000000..7a81fdbb1
--- /dev/null
+++ b/tests/unit/test_credentials.py
@@ -0,0 +1,12 @@
+from dbt.adapters.spark.connections import SparkConnectionMethod, SparkCredentials
+
+
+def test_credentials_server_side_parameters_keys_and_values_are_strings() -> None:
+    credentials = SparkCredentials(
+        host="localhost",
+        method=SparkConnectionMethod.THRIFT,
+        database="tests",
+        schema="tests",
+        server_side_parameters={"spark.configuration": 10},
+    )
+    assert credentials.server_side_parameters["spark.configuration"] == "10"
diff --git a/tests/unit/test_macros.py b/tests/unit/test_macros.py
index 220a74db7..5b648a079 100644
--- a/tests/unit/test_macros.py
+++ b/tests/unit/test_macros.py
@@ -5,153 +5,206 @@
 
 
 class TestSparkMacros(unittest.TestCase):
-
     def setUp(self):
-        self.jinja_env = Environment(loader=FileSystemLoader('dbt/include/spark/macros'),
-                                     extensions=['jinja2.ext.do', ])
+        self.jinja_env = Environment(
+            loader=FileSystemLoader("dbt/include/spark/macros"),
+            extensions=[
+                "jinja2.ext.do",
+            ],
+        )
 
         self.config = {}
         self.default_context = {
-            'validation': mock.Mock(),
-            'model': mock.Mock(),
-            'exceptions': mock.Mock(),
-            'config': mock.Mock(),
-            'adapter': mock.Mock(),
-            'return': lambda r: r,
+            "validation": mock.Mock(),
+            "model": mock.Mock(),
+            "exceptions": mock.Mock(),
+            "config": mock.Mock(),
+            "adapter": mock.Mock(),
+            "return": lambda r: r,
         }
-        self.default_context['config'].get = lambda key, default=None, **kwargs: self.config.get(key, default)
+        self.default_context["config"].get = lambda key, default=None, **kwargs: self.config.get(
+            key, default
+        )
 
     def __get_template(self, template_filename):
         return self.jinja_env.get_template(template_filename, globals=self.default_context)
 
     def __run_macro(self, template, name, temporary, relation, sql):
-        self.default_context['model'].alias = relation
+        self.default_context["model"].alias = relation
 
         def dispatch(macro_name, macro_namespace=None, packages=None):
-            return getattr(template.module, f'spark__{macro_name}')
-        self.default_context['adapter'].dispatch = dispatch
+            return getattr(template.module, f"spark__{macro_name}")
+
+        self.default_context["adapter"].dispatch = dispatch
 
         value = getattr(template.module, name)(temporary, relation, sql)
-        return re.sub(r'\s\s+', ' ', value)
+        return re.sub(r"\s\s+", " ", value)
 
     def test_macros_load(self):
-        self.jinja_env.get_template('adapters.sql')
+        self.jinja_env.get_template("adapters.sql")
 
     def test_macros_create_table_as(self):
-        template = self.__get_template('adapters.sql')
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
+        template = self.__get_template("adapters.sql")
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
 
         self.assertEqual(sql, "create table my_table as select 1")
 
     def test_macros_create_table_as_file_format(self):
-        template = self.__get_template('adapters.sql')
+        template = self.__get_template("adapters.sql")
 
-        self.config['file_format'] = 'delta'
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
+        self.config["file_format"] = "delta"
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
         self.assertEqual(sql, "create or replace table my_table using delta as select 1")
 
-        self.config['file_format'] = 'hudi'
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
+        self.config["file_format"] = "hudi"
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
         self.assertEqual(sql, "create table my_table using hudi as select 1")
 
     def test_macros_create_table_as_options(self):
-        template = self.__get_template('adapters.sql')
+        template = self.__get_template("adapters.sql")
 
-        self.config['file_format'] = 'delta'
-        self.config['options'] = {"compression": "gzip"}
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
-        self.assertEqual(sql, 'create or replace table my_table using delta options (compression "gzip" ) as select 1')
+        self.config["file_format"] = "delta"
+        self.config["options"] = {"compression": "gzip"}
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
+        self.assertEqual(
+            sql,
+            'create or replace table my_table using delta options (compression "gzip" ) as select 1',
+        )
 
-        self.config['file_format'] = 'hudi'
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
-        self.assertEqual(sql, 'create table my_table using hudi options (compression "gzip" ) as select 1')
+        self.config["file_format"] = "hudi"
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
+        self.assertEqual(
+            sql, 'create table my_table using hudi options (compression "gzip" ) as select 1'
+        )
 
     def test_macros_create_table_as_hudi_options(self):
-        template = self.__get_template('adapters.sql')
+        template = self.__get_template("adapters.sql")
 
-        self.config['file_format'] = 'hudi'
-        self.config['unique_key'] = 'id'
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1 as id').strip()
-        self.assertEqual(sql, 'create table my_table using hudi options (primaryKey "id" ) as select 1 as id')
+        self.config["file_format"] = "hudi"
+        self.config["unique_key"] = "id"
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1 as id"
+        ).strip()
+        self.assertEqual(
+            sql, 'create table my_table using hudi options (primaryKey "id" ) as select 1 as id'
+        )
 
-        self.config['file_format'] = 'hudi'
-        self.config['unique_key'] = 'id'
-        self.config['options'] = {'primaryKey': 'id'}
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1 as id').strip()
-        self.assertEqual(sql, 'create table my_table using hudi options (primaryKey "id" ) as select 1 as id')
+        self.config["file_format"] = "hudi"
+        self.config["unique_key"] = "id"
+        self.config["options"] = {"primaryKey": "id"}
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1 as id"
+        ).strip()
+        self.assertEqual(
+            sql, 'create table my_table using hudi options (primaryKey "id" ) as select 1 as id'
+        )
 
-        self.config['file_format'] = 'hudi'
-        self.config['unique_key'] = 'uuid'
-        self.config['options'] = {'primaryKey': 'id'}
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1 as id')
-        self.assertIn('mock.raise_compiler_error()', sql)
+        self.config["file_format"] = "hudi"
+        self.config["unique_key"] = "uuid"
+        self.config["options"] = {"primaryKey": "id"}
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1 as id"
+        )
+        self.assertIn("mock.raise_compiler_error()", sql)
 
     def test_macros_create_table_as_partition(self):
-        template = self.__get_template('adapters.sql')
+        template = self.__get_template("adapters.sql")
 
-        self.config['partition_by'] = 'partition_1'
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
+        self.config["partition_by"] = "partition_1"
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
         self.assertEqual(sql, "create table my_table partitioned by (partition_1) as select 1")
 
     def test_macros_create_table_as_partitions(self):
-        template = self.__get_template('adapters.sql')
+        template = self.__get_template("adapters.sql")
 
-        self.config['partition_by'] = ['partition_1', 'partition_2']
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
-        self.assertEqual(sql,
-                         "create table my_table partitioned by (partition_1,partition_2) as select 1")
+        self.config["partition_by"] = ["partition_1", "partition_2"]
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
+        self.assertEqual(
+            sql, "create table my_table partitioned by (partition_1,partition_2) as select 1"
+        )
 
     def test_macros_create_table_as_cluster(self):
-        template = self.__get_template('adapters.sql')
+        template = self.__get_template("adapters.sql")
 
-        self.config['clustered_by'] = 'cluster_1'
-        self.config['buckets'] = '1'
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
-        self.assertEqual(sql, "create table my_table clustered by (cluster_1) into 1 buckets as select 1")
+        self.config["clustered_by"] = "cluster_1"
+        self.config["buckets"] = "1"
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
+        self.assertEqual(
+            sql, "create table my_table clustered by (cluster_1) into 1 buckets as select 1"
+        )
 
     def test_macros_create_table_as_clusters(self):
-        template = self.__get_template('adapters.sql')
+        template = self.__get_template("adapters.sql")
 
-        self.config['clustered_by'] = ['cluster_1', 'cluster_2']
-        self.config['buckets'] = '1'
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
-        self.assertEqual(sql, "create table my_table clustered by (cluster_1,cluster_2) into 1 buckets as select 1")
+        self.config["clustered_by"] = ["cluster_1", "cluster_2"]
+        self.config["buckets"] = "1"
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
+        self.assertEqual(
+            sql,
+            "create table my_table clustered by (cluster_1,cluster_2) into 1 buckets as select 1",
+        )
 
     def test_macros_create_table_as_location(self):
-        template = self.__get_template('adapters.sql')
+        template = self.__get_template("adapters.sql")
 
-        self.config['location_root'] = '/mnt/root'
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
+        self.config["location_root"] = "/mnt/root"
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
         self.assertEqual(sql, "create table my_table location '/mnt/root/my_table' as select 1")
 
     def test_macros_create_table_as_comment(self):
-        template = self.__get_template('adapters.sql')
+        template = self.__get_template("adapters.sql")
 
-        self.config['persist_docs'] = {'relation': True}
-        self.default_context['model'].description = 'Description Test'
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
+        self.config["persist_docs"] = {"relation": True}
+        self.default_context["model"].description = "Description Test"
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
         self.assertEqual(sql, "create table my_table comment 'Description Test' as select 1")
 
     def test_macros_create_table_as_all(self):
-        template = self.__get_template('adapters.sql')
-
-        self.config['file_format'] = 'delta'
-        self.config['location_root'] = '/mnt/root'
-        self.config['partition_by'] = ['partition_1', 'partition_2']
-        self.config['clustered_by'] = ['cluster_1', 'cluster_2']
-        self.config['buckets'] = '1'
-        self.config['persist_docs'] = {'relation': True}
-        self.default_context['model'].description = 'Description Test'
-
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
+        template = self.__get_template("adapters.sql")
+
+        self.config["file_format"] = "delta"
+        self.config["location_root"] = "/mnt/root"
+        self.config["partition_by"] = ["partition_1", "partition_2"]
+        self.config["clustered_by"] = ["cluster_1", "cluster_2"]
+        self.config["buckets"] = "1"
+        self.config["persist_docs"] = {"relation": True}
+        self.default_context["model"].description = "Description Test"
+
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
         self.assertEqual(
             sql,
-            "create or replace table my_table using delta partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root/my_table' comment 'Description Test' as select 1"
+            "create or replace table my_table using delta partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root/my_table' comment 'Description Test' as select 1",
         )
 
-        self.config['file_format'] = 'hudi'
-        sql = self.__run_macro(template, 'spark__create_table_as', False, 'my_table', 'select 1').strip()
+        self.config["file_format"] = "hudi"
+        sql = self.__run_macro(
+            template, "spark__create_table_as", False, "my_table", "select 1"
+        ).strip()
         self.assertEqual(
             sql,
-            "create table my_table using hudi partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root/my_table' comment 'Description Test' as select 1"
+            "create table my_table using hudi partitioned by (partition_1,partition_2) clustered by (cluster_1,cluster_2) into 1 buckets location '/mnt/root/my_table' comment 'Description Test' as select 1",
         )
diff --git a/tests/unit/utils.py b/tests/unit/utils.py
index 585f1f61d..ac8c62244 100644
--- a/tests/unit/utils.py
+++ b/tests/unit/utils.py
@@ -26,22 +26,22 @@ def normalize(path):
 
 
 class Obj:
-    which = 'blah'
+    which = "blah"
     single_threaded = False
 
 
-def mock_connection(name, state='open'):
+def mock_connection(name, state="open"):
     conn = mock.MagicMock()
     conn.name = name
     conn.state = state
     return conn
 
 
-def profile_from_dict(profile, profile_name, cli_vars='{}'):
+def profile_from_dict(profile, profile_name, cli_vars="{}"):
     from dbt.config import Profile
     from dbt.config.renderer import ProfileRenderer
-    from dbt.context.base import generate_base_context
     from dbt.config.utils import parse_cli_vars
+
     if not isinstance(cli_vars, dict):
         cli_vars = parse_cli_vars(cli_vars)
 
@@ -51,6 +51,7 @@ def profile_from_dict(profile, profile_name, cli_vars='{}'):
     # flags global. This is a bit of a hack, but it's the best way to do it.
     from dbt.flags import set_from_args
     from argparse import Namespace
+
     set_from_args(Namespace(), None)
     return Profile.from_raw_profile_info(
         profile,
@@ -59,16 +60,16 @@ def profile_from_dict(profile, profile_name, cli_vars='{}'):
     )
 
 
-def project_from_dict(project, profile, packages=None, selectors=None, cli_vars='{}'):
-    from dbt.config import Project
+def project_from_dict(project, profile, packages=None, selectors=None, cli_vars="{}"):
     from dbt.config.renderer import DbtProjectYamlRenderer
     from dbt.config.utils import parse_cli_vars
+
     if not isinstance(cli_vars, dict):
         cli_vars = parse_cli_vars(cli_vars)
 
     renderer = DbtProjectYamlRenderer(profile, cli_vars)
 
-    project_root = project.pop('project-root', os.getcwd())
+    project_root = project.pop("project-root", os.getcwd())
 
     partial = PartialProject.from_dicts(
         project_root=project_root,
@@ -79,19 +80,18 @@ def project_from_dict(project, profile, packages=None, selectors=None, cli_vars=
     return partial.render(renderer)
 
 
-
-def config_from_parts_or_dicts(project, profile, packages=None, selectors=None, cli_vars='{}'):
+def config_from_parts_or_dicts(project, profile, packages=None, selectors=None, cli_vars="{}"):
     from dbt.config import Project, Profile, RuntimeConfig
     from dbt.config.utils import parse_cli_vars
     from copy import deepcopy
+
     if not isinstance(cli_vars, dict):
         cli_vars = parse_cli_vars(cli_vars)
 
-
     if isinstance(project, Project):
         profile_name = project.profile_name
     else:
-        profile_name = project.get('profile')
+        profile_name = project.get("profile")
 
     if not isinstance(profile, Profile):
         profile = profile_from_dict(
@@ -111,16 +111,13 @@ def config_from_parts_or_dicts(project, profile, packages=None, selectors=None,
 
     args = Obj()
     args.vars = cli_vars
-    args.profile_dir = '/dev/null'
-    return RuntimeConfig.from_parts(
-        project=project,
-        profile=profile,
-        args=args
-    )
+    args.profile_dir = "/dev/null"
+    return RuntimeConfig.from_parts(project=project, profile=profile, args=args)
 
 
 def inject_plugin(plugin):
     from dbt.adapters.factory import FACTORY
+
     key = plugin.adapter.type()
     FACTORY.plugins[key] = plugin
 
@@ -128,6 +125,7 @@ def inject_plugin(plugin):
 def inject_plugin_for(config):
     # from dbt.adapters.postgres import Plugin, PostgresAdapter
     from dbt.adapters.factory import FACTORY
+
     FACTORY.load_plugin(config.credentials.type)
     adapter = FACTORY.get_adapter(config)
     return adapter
@@ -139,12 +137,14 @@ def inject_adapter(value, plugin):
     """
     inject_plugin(plugin)
     from dbt.adapters.factory import FACTORY
+
     key = value.type()
     FACTORY.adapters[key] = value
 
 
 def clear_plugin(plugin):
     from dbt.adapters.factory import FACTORY
+
     key = plugin.adapter.type()
     FACTORY.plugins.pop(key, None)
     FACTORY.adapters.pop(key, None)
@@ -164,7 +164,7 @@ def assert_from_dict(self, obj, dct, cls=None):
         if cls is None:
             cls = self.ContractType
         cls.validate(dct)
-        self.assertEqual(cls.from_dict(dct),  obj)
+        self.assertEqual(cls.from_dict(dct), obj)
 
     def assert_symmetric(self, obj, dct, cls=None):
         self.assert_to_dict(obj, dct)
@@ -187,7 +187,7 @@ def compare_dicts(dict1, dict2):
     common_keys = set(first_set).intersection(set(second_set))
     found_differences = False
     for key in common_keys:
-        if dict1[key] != dict2[key] :
+        if dict1[key] != dict2[key]:
             print(f"--- --- first dict: {key}: {str(dict1[key])}")
             print(f"--- --- second dict: {key}: {str(dict2[key])}")
             found_differences = True
@@ -202,7 +202,7 @@ def assert_from_dict(obj, dct, cls=None):
         cls = obj.__class__
     cls.validate(dct)
     obj_from_dict = cls.from_dict(dct)
-    if hasattr(obj, 'created_at'):
+    if hasattr(obj, "created_at"):
         obj_from_dict.created_at = 1
         obj.created_at = 1
     assert obj_from_dict == obj
@@ -210,10 +210,10 @@ def assert_from_dict(obj, dct, cls=None):
 
 def assert_to_dict(obj, dct):
     obj_to_dict = obj.to_dict(omit_none=True)
-    if 'created_at' in obj_to_dict:
-        obj_to_dict['created_at'] = 1
-    if 'created_at' in dct:
-        dct['created_at'] = 1
+    if "created_at" in obj_to_dict:
+        obj_to_dict["created_at"] = 1
+    if "created_at" in dct:
+        dct["created_at"] = 1
     assert obj_to_dict == dct
 
 
@@ -231,25 +231,26 @@ def assert_fails_validation(dct, cls):
 def generate_name_macros(package):
     from dbt.contracts.graph.parsed import ParsedMacro
     from dbt.node_types import NodeType
+
     name_sql = {}
-    for component in ('database', 'schema', 'alias'):
-        if component == 'alias':
-            source = 'node.name'
+    for component in ("database", "schema", "alias"):
+        if component == "alias":
+            source = "node.name"
         else:
-            source = f'target.{component}'
-        name = f'generate_{component}_name'
-        sql = f'{{% macro {name}(value, node) %}} {{% if value %}} {{{{ value }}}} {{% else %}} {{{{ {source} }}}} {{% endif %}} {{% endmacro %}}'
+            source = f"target.{component}"
+        name = f"generate_{component}_name"
+        sql = f"{{% macro {name}(value, node) %}} {{% if value %}} {{{{ value }}}} {{% else %}} {{{{ {source} }}}} {{% endif %}} {{% endmacro %}}"
         name_sql[name] = sql
 
     for name, sql in name_sql.items():
         pm = ParsedMacro(
             name=name,
             resource_type=NodeType.Macro,
-            unique_id=f'macro.{package}.{name}',
+            unique_id=f"macro.{package}.{name}",
             package_name=package,
-            original_file_path=normalize('macros/macro.sql'),
-            root_path='./dbt_packages/root',
-            path=normalize('macros/macro.sql'),
+            original_file_path=normalize("macros/macro.sql"),
+            root_path="./dbt_packages/root",
+            path=normalize("macros/macro.sql"),
             macro_sql=sql,
         )
         yield pm
@@ -258,6 +259,7 @@ def generate_name_macros(package):
 class TestAdapterConversions(TestCase):
     def _get_tester_for(self, column_type):
         from dbt.clients import agate_helper
+
         if column_type is agate.TimeDelta:  # dbt never makes this!
             return agate.TimeDelta()
 
@@ -265,10 +267,10 @@ def _get_tester_for(self, column_type):
             if type(instance) is column_type:
                 return instance
 
-        raise ValueError(f'no tester for {column_type}')
+        raise ValueError(f"no tester for {column_type}")
 
     def _make_table_of(self, rows, column_types):
-        column_names = list(string.ascii_letters[:len(rows[0])])
+        column_names = list(string.ascii_letters[: len(rows[0])])
         if isinstance(column_types, type):
             column_types = [self._get_tester_for(column_types) for _ in column_names]
         else:
@@ -277,50 +279,48 @@ def _make_table_of(self, rows, column_types):
         return table
 
 
-def MockMacro(package, name='my_macro', **kwargs):
+def MockMacro(package, name="my_macro", **kwargs):
     from dbt.contracts.graph.parsed import ParsedMacro
     from dbt.node_types import NodeType
 
     mock_kwargs = dict(
         resource_type=NodeType.Macro,
         package_name=package,
-        unique_id=f'macro.{package}.{name}',
-        original_file_path='/dev/null',
+        unique_id=f"macro.{package}.{name}",
+        original_file_path="/dev/null",
     )
 
     mock_kwargs.update(kwargs)
 
-    macro = mock.MagicMock(
-        spec=ParsedMacro,
-        **mock_kwargs
-    )
+    macro = mock.MagicMock(spec=ParsedMacro, **mock_kwargs)
     macro.name = name
     return macro
 
 
-def MockMaterialization(package, name='my_materialization', adapter_type=None, **kwargs):
+def MockMaterialization(package, name="my_materialization", adapter_type=None, **kwargs):
     if adapter_type is None:
-        adapter_type = 'default'
-    kwargs['adapter_type'] = adapter_type
-    return MockMacro(package, f'materialization_{name}_{adapter_type}', **kwargs)
+        adapter_type = "default"
+    kwargs["adapter_type"] = adapter_type
+    return MockMacro(package, f"materialization_{name}_{adapter_type}", **kwargs)
 
 
-def MockGenerateMacro(package, component='some_component', **kwargs):
-    name = f'generate_{component}_name'
+def MockGenerateMacro(package, component="some_component", **kwargs):
+    name = f"generate_{component}_name"
     return MockMacro(package, name=name, **kwargs)
 
 
 def MockSource(package, source_name, name, **kwargs):
     from dbt.node_types import NodeType
     from dbt.contracts.graph.parsed import ParsedSourceDefinition
+
     src = mock.MagicMock(
         __class__=ParsedSourceDefinition,
         resource_type=NodeType.Source,
         source_name=source_name,
         package_name=package,
-        unique_id=f'source.{package}.{source_name}.{name}',
-        search_name=f'{source_name}.{name}',
-        **kwargs
+        unique_id=f"source.{package}.{source_name}.{name}",
+        search_name=f"{source_name}.{name}",
+        **kwargs,
     )
     src.name = name
     return src
@@ -329,6 +329,7 @@ def MockSource(package, source_name, name, **kwargs):
 def MockNode(package, name, resource_type=None, **kwargs):
     from dbt.node_types import NodeType
     from dbt.contracts.graph.parsed import ParsedModelNode, ParsedSeedNode
+
     if resource_type is None:
         resource_type = NodeType.Model
     if resource_type == NodeType.Model:
@@ -336,14 +337,14 @@ def MockNode(package, name, resource_type=None, **kwargs):
     elif resource_type == NodeType.Seed:
         cls = ParsedSeedNode
     else:
-        raise ValueError(f'I do not know how to handle {resource_type}')
+        raise ValueError(f"I do not know how to handle {resource_type}")
     node = mock.MagicMock(
         __class__=cls,
         resource_type=resource_type,
         package_name=package,
-        unique_id=f'{str(resource_type)}.{package}.{name}',
+        unique_id=f"{str(resource_type)}.{package}.{name}",
         search_name=name,
-        **kwargs
+        **kwargs,
     )
     node.name = name
     return node
@@ -352,22 +353,23 @@ def MockNode(package, name, resource_type=None, **kwargs):
 def MockDocumentation(package, name, **kwargs):
     from dbt.node_types import NodeType
     from dbt.contracts.graph.parsed import ParsedDocumentation
+
     doc = mock.MagicMock(
         __class__=ParsedDocumentation,
         resource_type=NodeType.Documentation,
         package_name=package,
         search_name=name,
-        unique_id=f'{package}.{name}',
-        **kwargs
+        unique_id=f"{package}.{name}",
+        **kwargs,
     )
     doc.name = name
     return doc
 
 
-def load_internal_manifest_macros(config, macro_hook = lambda m: None):
+def load_internal_manifest_macros(config, macro_hook=lambda m: None):
     from dbt.parser.manifest import ManifestLoader
-    return ManifestLoader.load_macros(config, macro_hook)
 
+    return ManifestLoader.load_macros(config, macro_hook)
 
 
 def dict_replace(dct, **kwargs):
diff --git a/tox.ini b/tox.ini
index 0747b61f5..97017a926 100644
--- a/tox.ini
+++ b/tox.ini
@@ -2,7 +2,7 @@
 skipsdist = True
 envlist = unit, flake8, integration-spark-thrift
 
-[testenv:{unit,py37,py38,py39,py310,py}]
+[testenv:{unit,py38,py39,py310,py}]
 allowlist_externals =
     /bin/bash
 commands = /bin/bash -c '{envpython} -m pytest -v {posargs} tests/unit'
@@ -71,7 +71,7 @@ deps =
 [testenv:integration-spark-session]
 allowlist_externals =
     /bin/bash
-basepython = python3
+basepython = python3.10
 commands = /bin/bash -c '{envpython} -m pytest -v --profile spark_session {posargs} -n4 tests/functional/adapter/*'
 passenv =
     DBT_*