Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixes join syntax for athena #25

Merged
merged 2 commits into from
Jun 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# TidierDB.jl updates

## v0.1.5 - 2024-06-05
- Adjusts Athena backend join syntax to match all other backends

## v0.1.4 - 2024-05-14
- Adds Google Big Query support
- use `connect` with GBQ JSON credentials and project id establish connection
Expand Down
35 changes: 1 addition & 34 deletions docs/examples/UserGuide/athena.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# To use the Athena AWS backend with TidierDB, set up and a few minimal but key syntax differences are covered here.
# To use the Athena AWS backend with TidierDB, set up and a small syntax difference are covered here.

# ## Connecting
# Connection is established through AWS.jl as shwon below.
Expand Down Expand Up @@ -62,37 +62,4 @@
# 2 │ 8 15.1
# ```

# ## Joining Syntax
# Since running queries requires athena_params to be passed, and all of the joins pull in the new table metadata with a query, when performing joins in Athena, the final argument of the join must be the Athena Parameters. This syntax difference will hopefully be resolved in the future.
# ```julia
# query = @chain from_query(mtcars) begin
# @group_by cyl
# @summarize begin
# across(mpg, (mean, minimum, maximum))
# num_cars = n()
# end
# @mutate begin
# efficiency = case_when(
# mean_mpg >= 25, "High",
# mean_mpg >= 15, "Moderate",
# "Low" )
# end
# end;

# @chain from_query(query) begin
# @full_join(demodb.mtcars, cyl, cyl, athena_params)
# @group_by(efficiency)
# @summarize(avg_hp = mean(hp))
# @collect
# end
# ```
# ```
# 2×2 DataFrame
# Row │ efficiency avg_hp
# │ String Float64
# ─────┼──────────────────────
# 1 │ High 82.6364
# 2 │ Moderate 180.238
# ```

# I would like to acknowledge the work of Manu Francis and this [blog post](https://medium.com/@manuedavakandam/beginners-guide-to-aws-athena-with-julia-a0192f7f4b4a), which helped guide this process
2 changes: 1 addition & 1 deletion src/TidierDB.jl
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ function finalize_query(sqlquery::SQLQuery)
"FROM )" => ")" , "SELECT SELECT " => "SELECT ", "SELECT SELECT " => "SELECT ", "DISTINCT SELECT " => "DISTINCT ",
"SELECT SELECT SELECT " => "SELECT ", "PARTITION BY GROUP BY" => "PARTITION BY", "GROUP BY GROUP BY" => "GROUP BY", "HAVING HAVING" => "HAVING", )

if current_sql_mode[] == :postgres || current_sql_mode[] == :duckdb || current_sql_mode[] == :mysql || current_sql_mode[] == :mssql || current_sql_mode[] == :clickhouse
if current_sql_mode[] == :postgres || current_sql_mode[] == :duckdb || current_sql_mode[] == :mysql || current_sql_mode[] == :mssql || current_sql_mode[] == :clickhouse || current_sql_mode[] == :athena || current_sql_mode[] == :gbq
complete_query = replace(complete_query, "\"" => "'", "==" => "=")
end

Expand Down
37 changes: 18 additions & 19 deletions src/joins_sq.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ end
"""
$docstring_left_join
"""
macro left_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=nothing)
macro left_join(sqlquery, join_table, lhs_column, rhs_column)
# Convert column references to string
lhs_col_str = string(lhs_column)
rhs_col_str = string(rhs_column)
Expand Down Expand Up @@ -43,10 +43,10 @@ macro left_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=noth
sq.from *= join_clause
end

if $(esc(athena_params)) === nothing
if current_sql_mode[] != :athena
new_metadata = get_table_metadata(sq.db, string($(esc(join_table))))
else
new_metadata = get_table_metadata_athena(sq.db, string($(esc(join_table))), $(esc(athena_params)))
new_metadata = get_table_metadata_athena(sq.db, string($(esc(join_table))), sq.athena_params)
end
sq.metadata = vcat(sq.metadata, new_metadata)
else
Expand All @@ -59,7 +59,7 @@ end
"""
$docstring_right_join
"""
macro right_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=nothing)
macro right_join(sqlquery, join_table, lhs_column, rhs_column)
# Convert column references to string
lhs_col_str = string(lhs_column)
rhs_col_str = string(rhs_column)
Expand Down Expand Up @@ -90,10 +90,10 @@ macro right_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=not
sq.from *= join_clause
end

if $(esc(athena_params)) === nothing
if current_sql_mode[] != :athena
new_metadata = get_table_metadata(sq.db, string($(esc(join_table))))
else
new_metadata = get_table_metadata_athena(sq.db, string($(esc(join_table))), $(esc(athena_params)))
new_metadata = get_table_metadata_athena(sq.db, string($(esc(join_table))), sq.athena_params)
end
sq.metadata = vcat(sq.metadata, new_metadata)
else
Expand All @@ -107,7 +107,7 @@ end
"""
$docstring_inner_join
"""
macro inner_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=nothing)
macro inner_join(sqlquery, join_table, lhs_column, rhs_column)
# Convert column references to string
lhs_col_str = string(lhs_column)
rhs_col_str = string(rhs_column)
Expand Down Expand Up @@ -138,10 +138,10 @@ macro inner_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=not
sq.from *= join_clause
end

if $(esc(athena_params)) === nothing
if current_sql_mode[] != :athena
new_metadata = get_table_metadata(sq.db, string($(esc(join_table))))
else
new_metadata = get_table_metadata_athena(sq.db, string($(esc(join_table))), $(esc(athena_params)))
new_metadata = get_table_metadata_athena(sq.db, string($(esc(join_table))), sq.athena_params)
end
sq.metadata = vcat(sq.metadata, new_metadata)
else
Expand All @@ -155,7 +155,7 @@ end
"""
$docstring_full_join
"""
macro full_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=nothing)
macro full_join(sqlquery, join_table, lhs_column, rhs_column)
# Convert column references to string
lhs_col_str = string(lhs_column)
rhs_col_str = string(rhs_column)
Expand Down Expand Up @@ -186,10 +186,10 @@ macro full_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=noth
sq.from *= join_clause
end

if $(esc(athena_params)) === nothing
if current_sql_mode[] != :athena
new_metadata = get_table_metadata(sq.db, string($(esc(join_table))))
else
new_metadata = get_table_metadata_athena(sq.db, string($(esc(join_table))), $(esc(athena_params)))
new_metadata = get_table_metadata_athena(sq.db, string($(esc(join_table))), sq.athena_params)
end
sq.metadata = vcat(sq.metadata, new_metadata)
else
Expand All @@ -203,7 +203,7 @@ end
"""
$docstring_semi_join
"""
macro semi_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=nothing)
macro semi_join(sqlquery, join_table, lhs_column, rhs_column)
# Convert column references to string
lhs_col_str = string(lhs_column)
rhs_col_str = string(rhs_column)
Expand Down Expand Up @@ -234,10 +234,10 @@ macro semi_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=noth
sq.from *= join_clause
end

if $(esc(athena_params)) === nothing
if current_sql_mode[] != :athena
new_metadata = get_table_metadata(sq.db, string($(esc(join_table))))
else
new_metadata = get_table_metadata_athena(sq.db, string($(esc(join_table))), $(esc(athena_params)))
new_metadata = get_table_metadata_athena(sq.db, string($(esc(join_table))), sq.athena_params)
end
sq.metadata = vcat(sq.metadata, new_metadata)
else
Expand All @@ -251,7 +251,7 @@ end
"""
$docstring_anti_join
"""
macro anti_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=nothing)
macro anti_join(sqlquery, join_table, lhs_column, rhs_column)
# Convert column references to string
lhs_col_str = string(lhs_column)
rhs_col_str = string(rhs_column)
Expand Down Expand Up @@ -282,10 +282,10 @@ macro anti_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=noth
sq.from *= join_clause
end

if $(esc(athena_params)) === nothing
if current_sql_mode[] != :athena
new_metadata = get_table_metadata(sq.db, string($(esc(join_table))))
else
new_metadata = get_table_metadata_athena(sq.db, string($(esc(join_table))), $(esc(athena_params)))
new_metadata = get_table_metadata_athena(sq.db, string($(esc(join_table))), sq.athena_params)
end
sq.metadata = vcat(sq.metadata, new_metadata)
else
Expand All @@ -294,4 +294,3 @@ macro anti_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=noth
sq
end
end

Loading