From 341756bef474b00d25ca26398c71a4754f377c96 Mon Sep 17 00:00:00 2001 From: drizk1 Date: Fri, 21 Jun 2024 15:17:28 -0400 Subject: [PATCH] fix case sensitivity issue with snowflake vs tidierdb metadata --- NEWS.md | 3 +++ Project.toml | 2 +- docs/examples/UserGuide/Snowflake.jl | 3 +-- src/TBD_macros.jl | 11 +++++++---- src/db_parsing.jl | 13 +++++++++++++ 5 files changed, 25 insertions(+), 7 deletions(-) diff --git a/NEWS.md b/NEWS.md index 85258fa..42613cb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # TidierDB.jl updates +## v0.2.0 - 2024-06-21 +- Fixes case sensitivity with TidierDB metadata to make queries case insensitive when using Snowflake + ## v0.1.9 - 2024-06-20 - Small fix to internal `finalize_query` function for Snowflake diff --git a/Project.toml b/Project.toml index 4e07271..b54b8fb 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "TidierDB" uuid = "86993f9b-bbba-4084-97c5-ee15961ad48b" authors = ["Daniel Rizk and contributors"] -version = "0.1.9" +version = "0.2.0" [deps] AWS = "fbe9abb3-538b-5e4e-ba9e-bc94f4f92ebc" diff --git a/docs/examples/UserGuide/Snowflake.jl b/docs/examples/UserGuide/Snowflake.jl index 9369186..a633225 100644 --- a/docs/examples/UserGuide/Snowflake.jl +++ b/docs/examples/UserGuide/Snowflake.jl @@ -8,9 +8,8 @@ # - Schema Name # - Compute Warehouse name -# Three things to note: +# Two things to note: # - Your OAuth Token may frequently expire, which may require you to rerun your connection line. -# - For the time being, to properly track columns in the local metadata, you must write them using ALL CAPS - this will likely be addressed and rectified in the future # - Since each time `db_table` runs, it runs a query to pull the metadata, you may choose to use run `db_table` and save the results, and use these results with`from_query()` # - This will reduce the number of queries to your database # - Allow you to build a a SQL query and `@show_query` even if the OAuth_token has expired. To `@collect` you will have to reconnect and rerun db_table if your OAuth token has expired diff --git a/src/TBD_macros.jl b/src/TBD_macros.jl index 3037c94..0211597 100644 --- a/src/TBD_macros.jl +++ b/src/TBD_macros.jl @@ -7,7 +7,6 @@ macro select(sqlquery, exprs...) return quote exprs_str = map(expr -> isa(expr, Symbol) ? string(expr) : expr, $exprs) - let columns = parse_tidy_db($exprs_str, $(esc(sqlquery)).metadata) columns_str = join(["SELECT ", join([string(column) for column in columns], ", ")]) $(esc(sqlquery)).select = columns_str @@ -159,6 +158,9 @@ end function process_mutate_expression(expr, sq, select_expressions, cte_name) if isa(expr, Expr) && expr.head == :(=) && isa(expr.args[1], Symbol) col_name = string(expr.args[1]) + if current_sql_mode[] == :snowflake + col_name = uppercase(col_name) + end col_expr = expr_to_sql(expr.args[2], sq) # Convert to SQL expression # Determine whether the column already exists or needs to be added @@ -371,6 +373,9 @@ function process_summary_expression(expr, sq, summary_str) summary_operation = string(summary_operation) summary_column = expr_to_sql(expr.args[1], sq, from_summarize = true) summary_column = string(summary_column) + if current_sql_mode[] == :snowflake + summary_column = uppercase(summary_column) + end push!(sq.metadata, Dict("name" => summary_column, "type" => "UNKNOWN", "current_selxn" => 1, "table_name" => sq.from)) push!(summary_str, summary_operation * " AS " * summary_column) @@ -665,9 +670,7 @@ macro collect(sqlquery) selected_columns_order = sq.metadata[sq.metadata.current_selxn .== 1, :name] df_result = df_result[:, selected_columns_order] elseif db isa GoogleSession{JSONCredentials} - df_result = collect_gbq(sq.db, final_query) - elseif current_sql_mode[] == :snowflake - df_result = execute_snowflake(db, final_query) + df_result = collect_gbq(sq.db, final_query) elseif current_sql_mode[] == :athena exe_query = Athena.start_query_execution(final_query, sq.athena_params; aws_config = db) status = "RUNNING" diff --git a/src/db_parsing.jl b/src/db_parsing.jl index 49325e1..0a7d6ed 100644 --- a/src/db_parsing.jl +++ b/src/db_parsing.jl @@ -40,7 +40,13 @@ function parse_tidy_db(exprs, metadata::DataFrame) if actual_expr.args[1] == :(:) # Handle range expression start_col = string(actual_expr.args[2]) + if current_sql_mode[] == :snowflake + start_col = uppercase(start_col) + end end_col = string(actual_expr.args[3]) + if current_sql_mode[] == :snowflake + end_col = uppercase(end_col) + end start_idx = findfirst(==(start_col), all_columns) end_idx = findfirst(==(end_col), all_columns) if isnothing(start_idx) || isnothing(end_idx) || start_idx > end_idx @@ -55,6 +61,9 @@ function parse_tidy_db(exprs, metadata::DataFrame) elseif actual_expr.args[1] == :starts_with || actual_expr.args[1] == :ends_with || actual_expr.args[1] == :contains # Handle starts_with, ends_with, and contains substring = actual_expr.args[2] + if current_sql_mode[] == :snowflake + substring = uppercase(substring) + end match_columns = filter(col -> (actual_expr.args[1] == :starts_with && startswith(col, substring)) || (actual_expr.args[1] == :ends_with && endswith(col, substring)) || @@ -74,7 +83,11 @@ function parse_tidy_db(exprs, metadata::DataFrame) push!(included_columns, string(actual_expr)) continue end + col_name = isa(actual_expr, Symbol) ? string(actual_expr) : actual_expr + if current_sql_mode[] == :snowflake + col_name = uppercase(col_name) + end if is_excluded push!(excluded_columns, col_name) else