From e2838c9b03cd8135020c62e4783ded363cce9190 Mon Sep 17 00:00:00 2001 From: Joel Labes Date: Tue, 29 Nov 2022 15:29:51 +1300 Subject: [PATCH] Recency truncate date option (#731) * WIP changing recency test * Add tests * cast to timestamp for bq * forgot the curlies * avoid lateral column aliasing * ts not dt * cast source as timestamp * don't cast inside test * cast as date instead of truncate * Update recency.sql * log bq events * store pg artifacts * int tests dir * Correctly store artifacts * try casting to date or datetime * order of operations more like order of ooperations * dt -> ts * Do I really have to cast this? * Revert "Do I really have to cast this?" This reverts commit 21e2c0d50a901551c94f5a29251c455f80bda9dc. --- .circleci/config.yml | 18 +++++++++++---- .../generic_tests/recency_time_excluded.sql | 12 ++++++++++ .../generic_tests/recency_time_included.sql | 4 ++++ .../models/generic_tests/schema.yml | 23 +++++++++++++++---- .../models/generic_tests/test_recency.sql | 16 ------------- macros/generic_tests/recency.sql | 14 +++++++---- 6 files changed, 57 insertions(+), 30 deletions(-) create mode 100644 integration_tests/models/generic_tests/recency_time_excluded.sql create mode 100644 integration_tests/models/generic_tests/recency_time_included.sql delete mode 100644 integration_tests/models/generic_tests/test_recency.sql diff --git a/.circleci/config.yml b/.circleci/config.yml index 5c0daa61..31e4a4d1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -23,7 +23,9 @@ jobs: name: "Run OG Tests - Postgres" command: ./run_test.sh postgres - store_artifacts: - path: ./logs + path: integration_tests/logs + - store_artifacts: + path: integration_tests/target integration-redshift: docker: @@ -35,7 +37,9 @@ jobs: name: "Run OG Tests - Redshift" command: ./run_test.sh redshift - store_artifacts: - path: ./logs + path: integration_tests/logs + - store_artifacts: + path: integration_tests/target integration-snowflake: docker: @@ -47,8 +51,10 @@ jobs: name: "Run OG Tests - Snowflake" command: ./run_test.sh snowflake - store_artifacts: - path: ./logs - + path: integration_tests/logs + - store_artifacts: + path: integration_tests/target + integration-bigquery: environment: BIGQUERY_SERVICE_KEY_PATH: "/home/circleci/bigquery-service-key.json" @@ -64,7 +70,9 @@ jobs: name: "Run OG Tests - BigQuery" command: ./run_test.sh bigquery - store_artifacts: - path: ./logs + path: integration_tests/logs + - store_artifacts: + path: integration_tests/target workflows: version: 2 diff --git a/integration_tests/models/generic_tests/recency_time_excluded.sql b/integration_tests/models/generic_tests/recency_time_excluded.sql new file mode 100644 index 00000000..1c18e800 --- /dev/null +++ b/integration_tests/models/generic_tests/recency_time_excluded.sql @@ -0,0 +1,12 @@ +with yesterday_time as ( +select + 1 as col1, + 2 as col2, + {{ dbt.dateadd('day', -1, dbt.current_timestamp()) }} as created_at +) + +select + col1, + col2, + {{ dbt.date_trunc('day', 'created_at') }} as created_at +from yesterday_time \ No newline at end of file diff --git a/integration_tests/models/generic_tests/recency_time_included.sql b/integration_tests/models/generic_tests/recency_time_included.sql new file mode 100644 index 00000000..93fe1db1 --- /dev/null +++ b/integration_tests/models/generic_tests/recency_time_included.sql @@ -0,0 +1,4 @@ +select + 1 as col1, + 2 as col2, + cast({{ dbt.dateadd('hour', -23, dbt.current_timestamp()) }} as {{ dbt.type_timestamp() }}) as created_at diff --git a/integration_tests/models/generic_tests/schema.yml b/integration_tests/models/generic_tests/schema.yml index 8be6c16a..96a46a3b 100644 --- a/integration_tests/models/generic_tests/schema.yml +++ b/integration_tests/models/generic_tests/schema.yml @@ -143,23 +143,38 @@ seeds: at_least: 0.9 models: - - name: test_recency + - name: recency_time_included tests: - dbt_utils.recency: datepart: day - field: today + field: created_at interval: 1 - dbt_utils.recency: datepart: day - field: today + field: created_at interval: 1 group_by_columns: ['col1'] - dbt_utils.recency: datepart: day - field: today + field: created_at interval: 1 group_by_columns: ['col1', 'col2'] + - name: recency_time_excluded + tests: + - dbt_utils.recency: + datepart: day + field: created_at + interval: 1 + ignore_time_component: true + - dbt_utils.recency: + datepart: day + field: created_at + interval: 1 + ignore_time_component: false + error_if: "<1" #sneaky way to ensure that the test is returning failing rows + warn_if: "<0" + - name: test_equal_rowcount tests: - dbt_utils.equal_rowcount: diff --git a/integration_tests/models/generic_tests/test_recency.sql b/integration_tests/models/generic_tests/test_recency.sql deleted file mode 100644 index c0fe0fff..00000000 --- a/integration_tests/models/generic_tests/test_recency.sql +++ /dev/null @@ -1,16 +0,0 @@ - -{% if target.type == 'postgres' %} - -select - 1 as col1, - 2 as col2, - {{ date_trunc('day', current_timestamp_backcompat()) }} as today - -{% else %} - -select - 1 as col1, - 2 as col2, - cast({{ date_trunc('day', current_timestamp_backcompat()) }} as datetime) as today - -{% endif %} \ No newline at end of file diff --git a/macros/generic_tests/recency.sql b/macros/generic_tests/recency.sql index d33ab134..7fe2cafd 100644 --- a/macros/generic_tests/recency.sql +++ b/macros/generic_tests/recency.sql @@ -1,10 +1,10 @@ -{% test recency(model, field, datepart, interval, group_by_columns = []) %} - {{ return(adapter.dispatch('test_recency', 'dbt_utils')(model, field, datepart, interval, group_by_columns)) }} +{% test recency(model, field, datepart, interval, ignore_time_component=False, group_by_columns = []) %} + {{ return(adapter.dispatch('test_recency', 'dbt_utils')(model, field, datepart, interval, ignore_time_component, group_by_columns)) }} {% endtest %} -{% macro default__test_recency(model, field, datepart, interval, group_by_columns) %} +{% macro default__test_recency(model, field, datepart, interval, ignore_time_component, group_by_columns) %} -{% set threshold = dbt.dateadd(datepart, interval * -1, current_timestamp_backcompat()) %} +{% set threshold = 'cast(' ~ dbt.dateadd(datepart, interval * -1, dbt.current_timestamp()) ~ ' as ' ~ ('date' if ignore_time_component else dbt.type_timestamp()) ~ ')' %} {% if group_by_columns|length() > 0 %} {% set select_gb_cols = group_by_columns|join(' ,') + ', ' %} @@ -17,7 +17,11 @@ with recency as ( select {{ select_gb_cols }} - max({{field}}) as most_recent + {% if ignore_time_component %} + cast(max({{ field }}) as date) as most_recent + {%- else %} + max({{ field }}) as most_recent + {%- endif %} from {{ model }}