-
Notifications
You must be signed in to change notification settings - Fork 149
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor incremental materialization for readibility
- Loading branch information
Showing
5 changed files
with
213 additions
and
205 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
21 changes: 21 additions & 0 deletions
21
dbt/include/bigquery/macros/materializations/incremental_strategy/common.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
{% macro build_partition_time_exp(partition_by) %} | ||
{% if partition_by.data_type == 'timestamp' %} | ||
{% set partition_value = partition_by.field %} | ||
{% else %} | ||
{% set partition_value = 'timestamp(' + partition_by.field + ')' %} | ||
{% endif %} | ||
{{ return({'value': partition_value, 'field': partition_by.field}) }} | ||
{% endmacro %} | ||
|
||
{% macro declare_dbt_max_partition(relation, partition_by, sql) %} | ||
|
||
{% if '_dbt_max_partition' in sql %} | ||
|
||
declare _dbt_max_partition {{ partition_by.data_type }} default ( | ||
select max({{ partition_by.field }}) from {{ this }} | ||
where {{ partition_by.field }} is not null | ||
); | ||
|
||
{% endif %} | ||
|
||
{% endmacro %} |
91 changes: 91 additions & 0 deletions
91
dbt/include/bigquery/macros/materializations/incremental_strategy/insert_overwrite.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
{% macro bq_generate_incremental_insert_overwrite_build_sql( | ||
tmp_relation, target_relation, sql, unique_key, partition_by, partitions, dest_columns, on_schema_change | ||
) %} | ||
{% if partition_by is none %} | ||
{% set missing_partition_msg -%} | ||
The 'insert_overwrite' strategy requires the `partition_by` config. | ||
{%- endset %} | ||
{% do exceptions.raise_compiler_error(missing_partition_msg) %} | ||
{% endif %} | ||
|
||
{% set build_sql = bq_insert_overwrite( | ||
tmp_relation, target_relation, sql, unique_key, partition_by, partitions, dest_columns, on_schema_change | ||
) %} | ||
|
||
{{ return(build_sql) }} | ||
|
||
{% endmacro %} | ||
|
||
{% macro bq_insert_overwrite( | ||
tmp_relation, target_relation, sql, unique_key, partition_by, partitions, dest_columns, tmp_relation_exists | ||
) %} | ||
|
||
{% if partitions is not none and partitions != [] %} {# static #} | ||
|
||
{% set predicate -%} | ||
{{ partition_by.render_wrapped(alias='DBT_INTERNAL_DEST') }} in ( | ||
{{ partitions | join (', ') }} | ||
) | ||
{%- endset %} | ||
|
||
{%- set source_sql -%} | ||
( | ||
{%- if partition_by.time_ingestion_partitioning -%} | ||
{{ wrap_with_time_ingestion_partitioning(build_partition_time_exp(partition_by), sql, True) }} | ||
{%- else -%} | ||
{{sql}} | ||
{%- endif -%} | ||
) | ||
{%- endset -%} | ||
|
||
{{ get_insert_overwrite_merge_sql(target_relation, source_sql, dest_columns, [predicate], include_sql_header=true) }} | ||
|
||
{% else %} {# dynamic #} | ||
|
||
{% set predicate -%} | ||
{{ partition_by.render_wrapped(alias='DBT_INTERNAL_DEST') }} in unnest(dbt_partitions_for_replacement) | ||
{%- endset %} | ||
|
||
{%- set source_sql -%} | ||
( | ||
select | ||
{% if partition_by.time_ingestion_partitioning -%} | ||
_PARTITIONTIME, | ||
{%- endif -%} | ||
* from {{ tmp_relation }} | ||
) | ||
{%- endset -%} | ||
|
||
declare dbt_partitions_for_replacement array<{{ partition_by.data_type }}>; | ||
|
||
{# have we already created the temp table to check for schema changes? #} | ||
{% if not tmp_relation_exists %} | ||
{{ declare_dbt_max_partition(this, partition_by, sql) }} | ||
|
||
-- 1. create a temp table | ||
{% set create_table_sql = bq_create_table_as(partition_by.time_ingestion_partitioning, True, tmp_relation, sql) %} | ||
{{ create_table_sql }} | ||
{% else %} | ||
-- 1. temp table already exists, we used it to check for schema changes | ||
{% endif %} | ||
|
||
-- 2. define partitions to update | ||
set (dbt_partitions_for_replacement) = ( | ||
select as struct | ||
array_agg(distinct {{ partition_by.render_wrapped() }}) | ||
from {{ tmp_relation }} | ||
); | ||
|
||
{# | ||
TODO: include_sql_header is a hack; consider a better approach that includes | ||
the sql_header at the materialization-level instead | ||
#} | ||
-- 3. run the merge statement | ||
{{ get_insert_overwrite_merge_sql(target_relation, source_sql, dest_columns, [predicate], include_sql_header=false) }}; | ||
|
||
-- 4. clean up the temp table | ||
drop table if exists {{ tmp_relation }} | ||
|
||
{% endif %} | ||
|
||
{% endmacro %} |
28 changes: 28 additions & 0 deletions
28
dbt/include/bigquery/macros/materializations/incremental_strategy/merge.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
{% macro bq_generate_incremental_merge_build_sql( | ||
tmp_relation, target_relation, sql, unique_key, partition_by, dest_columns, tmp_relation_exists | ||
) %} | ||
{%- set source_sql -%} | ||
{%- if tmp_relation_exists -%} | ||
( | ||
select | ||
{% if partition_by.time_ingestion_partitioning -%} | ||
_PARTITIONTIME, | ||
{%- endif -%} | ||
* from {{ tmp_relation }} | ||
) | ||
{%- else -%} {#-- wrap sql in parens to make it a subquery --#} | ||
( | ||
{%- if partition_by.time_ingestion_partitioning -%} | ||
{{ wrap_with_time_ingestion_partitioning(build_partition_time_exp(partition_by), sql, True) }} | ||
{%- else -%} | ||
{{sql}} | ||
{%- endif -%} | ||
) | ||
{%- endif -%} | ||
{%- endset -%} | ||
|
||
{% set build_sql = get_merge_sql(target_relation, source_sql, unique_key, dest_columns) %} | ||
|
||
{{ return(build_sql) }} | ||
|
||
{% endmacro %} |
Oops, something went wrong.