Skip to content

Commit

Permalink
Merge pull request #21 from TidierOrg/gbq-support
Browse files Browse the repository at this point in the history
adds Google Big Query querying and `connect` support
  • Loading branch information
drizk1 committed May 14, 2024
2 parents 07d6c3a + ca524f4 commit b5da180
Show file tree
Hide file tree
Showing 10 changed files with 353 additions and 25 deletions.
4 changes: 4 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# TidierDB.jl updates

## v0.1.4 - 2024-05-TBD
- Adds Google Big Query support
- use `connect` with BGQ JSON credentials and project id establish connection

## v0.1.3 - 2024-05-09
- Adds `@full_join`, `@semi_join`, `@anti_join`
- Fixes bug to allow joining tables for Athena backend
Expand Down
8 changes: 6 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "TidierDB"
uuid = "86993f9b-bbba-4084-97c5-ee15961ad48b"
authors = ["Daniel Rizk <rizk.daniel.12@gmail.com> and contributors"]
version = "0.1.3"
version = "0.1.4"

[deps]
AWS = "fbe9abb3-538b-5e4e-ba9e-bc94f4f92ebc"
Expand All @@ -11,6 +11,8 @@ ClickHouse = "82f2e89e-b495-11e9-1d9d-fb40d7cf2130"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
DuckDB = "d2f5444f-75bc-4fdf-ac35-56f514c445e1"
GoogleCloud = "55e21f81-8b0a-565e-b5ad-6816892a5ee7"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
LibPQ = "194296ae-ab2e-5f79-8cd4-7183a0a5a0d1"
MacroTools = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
MySQL = "39abe10b-433b-5dbd-92d4-e302a9df00cd"
Expand All @@ -19,14 +21,16 @@ Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
SQLite = "0aa819cd-b072-5ff4-a722-6bc24af294d9"

[compat]
Arrow = "2.7"
AWS = "1.9"
Arrow = "2.7"
Chain = "0.6"
ClickHouse = "0.2"
DataFrames = "1.5"
Documenter = "0.27, 1"
DuckDB = "0.10"
LibPQ = "1.17"
JSON3 = "1.1"
GoogleCloud = "0.11"
MacroTools = "0.5"
MySQL = "1.4"
ODBC = "1.1"
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ The main goal of TidierDB.jl is to bring the syntax of Tidier.jl to multiple SQL
- MSSQL `set_sql_mode(:mssql)`
- Postgres `set_sql_mode(:postgres)`
- Athena `set_sql_mode(:athena)`
- Google Big Query `set_sql_mode(:gbq)`

The style of SQL that is generated can be modified using `set_sql_mode()`.

Expand Down
2 changes: 1 addition & 1 deletion docs/examples/UserGuide/from_queryex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
# ```julia
# import TidierDB as DB
# con = DB.connect(:duckdb)
# DB.copy_to(con, "https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv", mtcars2)
# DB.copy_to(con, "https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv", "mtcars2")
# ```

# Start a query to analyze fuel efficiency by number of cylinders. However, to further build on this query later, end the chain without using `@show_query` or `@collect`
Expand Down
1 change: 1 addition & 0 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ The main goal of TidierDB.jl is to bring the syntax of Tidier.jl to multiple SQL
- MSSQL `set_sql_mode(:mssql)`
- Postgres `set_sql_mode(:postgres)`
- Athena `set_sql_mode(:athena)`
- Google Big Query `set_sql_mode(:gbq)`

The style of SQL that is generated can be modified using `set_sql_mode()`.

Expand Down
3 changes: 2 additions & 1 deletion src/TBD_macros.jl
Original file line number Diff line number Diff line change
Expand Up @@ -660,11 +660,12 @@ macro collect(sqlquery)
if db isa SQLite.DB || db isa LibPQ.Connection || db isa DuckDB.Connection || db isa MySQL.Connection || db isa ODBC.Connection
result = DBInterface.execute(db, final_query)
df_result = DataFrame(result)

elseif current_sql_mode[] == :clickhouse
df_result = ClickHouse.select_df(db, final_query)
selected_columns_order = sq.metadata[sq.metadata.current_selxn .== 1, :name]
df_result = df_result[:, selected_columns_order]
elseif db isa GoogleSession{JSONCredentials}
df_result = collect_gbq(sq.db, final_query)
elseif current_sql_mode[] == :athena
exe_query = Athena.start_query_execution(final_query, sq.athena_params; aws_config = db)
status = "RUNNING"
Expand Down
7 changes: 6 additions & 1 deletion src/TidierDB.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ using ODBC
import ClickHouse
using Arrow
using AWS
using JSON3
using GoogleCloud

@reexport using DataFrames: DataFrame
@reexport using Chain
Expand All @@ -36,6 +38,7 @@ include("parsing_mysql.jl")
include("parsing_mssql.jl")
include("parsing_clickhouse.jl")
include("parsing_athena.jl")
include("parsing_gbq.jl")
include("joins_sq.jl")
include("slices_sq.jl")

Expand Down Expand Up @@ -63,6 +66,8 @@ function expr_to_sql(expr, sq; from_summarize::Bool = false)
return expr_to_sql_clickhouse(expr, sq; from_summarize=from_summarize)
elseif current_sql_mode[] == :athena
return expr_to_sql_trino(expr, sq; from_summarize=from_summarize)
elseif current_sql_mode[] == :gbq
return expr_to_sql_gbq(expr, sq; from_summarize=from_summarize)
else
error("Unsupported SQL mode: $(current_sql_mode[])")
end
Expand Down Expand Up @@ -233,7 +238,7 @@ function db_table(db, table, athena_params::Any=nothing)
table_name = string(table)
metadata = if current_sql_mode[] == :lite
get_table_metadata(db, table_name)
elseif current_sql_mode[] == :postgres || current_sql_mode[] == :duckdb || current_sql_mode[] == :mysql || current_sql_mode[] == :mssql || current_sql_mode[] == :clickhouse
elseif current_sql_mode[] == :postgres || current_sql_mode[] == :duckdb || current_sql_mode[] == :mysql || current_sql_mode[] == :mssql || current_sql_mode[] == :clickhouse || current_sql_mode[] == :gbq
get_table_metadata(db, table_name)
elseif current_sql_mode[] == :athena
get_table_metadata_athena(db, table_name, athena_params)
Expand Down
2 changes: 2 additions & 0 deletions src/docstrings.jl
Original file line number Diff line number Diff line change
Expand Up @@ -982,6 +982,8 @@ This function establishes a database connection based on the specified backend a
# conn = connect(:clickhouse; host="localhost", port=9000, database="mydb", user="default", password="")
# Connect to SQLite
# conn = connect(:lite)
# Connect to Google Big Query
# conn = connect(:gbq, "json_user_key_path", "project_id")
# Connect to DuckDB
julia> db = connect(:duckdb)
DuckDB.Connection(":memory:")
Expand Down
51 changes: 31 additions & 20 deletions src/joins_sq.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
function gbq_join_parse(input)
input = string(input)
parts = split(input, ".")
if current_sql_mode[] == :gbq && length(parts) >=2
return join(parts[2:end], ".")
else
return input
end
end


"""
$docstring_left_join
"""
Expand All @@ -18,8 +29,8 @@ macro left_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=noth

most_recent_source = !isempty(sq.ctes) ? "cte_" * string(sq.cte_count - 1) : sq.from

join_sql = " " * most_recent_source * ".*, " * string($(esc(join_table))) * ".* FROM " * most_recent_source *
" LEFT JOIN " * string($(esc(join_table))) * " ON " * string($(esc(join_table)), ".", $lhs_col_str, " = ", most_recent_source, ".", $rhs_col_str)
join_sql = " " * most_recent_source * ".*, " * string(gbq_join_parse($(esc(join_table)))) * ".* FROM " * gbq_join_parse(most_recent_source) *
" LEFT JOIN " * string($(esc(join_table))) * " ON " * string(gbq_join_parse($(esc(join_table))), ".", $lhs_col_str, " = ", gbq_join_parse(most_recent_source), ".", $rhs_col_str)

# Create and add the new CTE
new_cte = CTE(name=cte_name, select=join_sql)
Expand All @@ -28,7 +39,7 @@ macro left_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=noth
# Update the FROM clause
sq.from = cte_name
else
join_clause = " LEFT JOIN " * string($(esc(join_table))) * " ON " * string($(esc(join_table)), ".", $lhs_col_str, " = ", sq.from, ".", $rhs_col_str)
join_clause = " LEFT JOIN " * string($(esc(join_table))) * " ON " * string(gbq_join_parse($(esc(join_table))), ".", $lhs_col_str, " = ", gbq_join_parse(sq.from), ".", $rhs_col_str)
sq.from *= join_clause
end

Expand Down Expand Up @@ -65,8 +76,8 @@ macro right_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=not

most_recent_source = !isempty(sq.ctes) ? "cte_" * string(sq.cte_count - 1) : sq.from

join_sql = " " * most_recent_source * ".*, " * string($(esc(join_table))) * ".* FROM " * most_recent_source *
" RIGHT JOIN " * string($(esc(join_table))) * " ON " * string($(esc(join_table)), ".", $lhs_col_str, " = ", most_recent_source, ".", $rhs_col_str)
join_sql = " " * most_recent_source * ".*, " * string(gbq_join_parse($(esc(join_table)))) * ".* FROM " * gbq_join_parse(most_recent_source) *
" RIGHT JOIN " * string($(esc(join_table))) * " ON " * string(gbq_join_parse($(esc(join_table))), ".", $lhs_col_str, " = ", gbq_join_parse(most_recent_source), ".", $rhs_col_str)

# Create and add the new CTE
new_cte = CTE(name=cte_name, select=join_sql)
Expand All @@ -75,7 +86,7 @@ macro right_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=not
# Update the FROM clause
sq.from = cte_name
else
join_clause = " RIGHT JOIN " * string($(esc(join_table))) * " ON " * string($(esc(join_table)), ".", $lhs_col_str, " = ", sq.from, ".", $rhs_col_str)
join_clause = " RIGHT JOIN " * string($(esc(join_table))) * " ON " * string(gbq_join_parse($(esc(join_table))), ".", $lhs_col_str, " = ", gbq_join_parse(sq.from), ".", $rhs_col_str)
sq.from *= join_clause
end

Expand All @@ -93,7 +104,6 @@ macro right_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=not
end



"""
$docstring_inner_join
"""
Expand All @@ -114,8 +124,8 @@ macro inner_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=not

most_recent_source = !isempty(sq.ctes) ? "cte_" * string(sq.cte_count - 1) : sq.from

join_sql = " " * most_recent_source * ".*, " * string($(esc(join_table))) * ".* FROM " * most_recent_source *
" INNER JOIN " * string($(esc(join_table))) * " ON " * string($(esc(join_table)), ".", $lhs_col_str, " = ", most_recent_source, ".", $rhs_col_str)
join_sql = " " * most_recent_source * ".*, " * string(gbq_join_parse($(esc(join_table)))) * ".* FROM " * gbq_join_parse(most_recent_source) *
" INNER JOIN " * string($(esc(join_table))) * " ON " * string(gbq_join_parse($(esc(join_table))), ".", $lhs_col_str, " = ", gbq_join_parse(most_recent_source), ".", $rhs_col_str)

# Create and add the new CTE
new_cte = CTE(name=cte_name, select=join_sql)
Expand All @@ -124,7 +134,7 @@ macro inner_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=not
# Update the FROM clause
sq.from = cte_name
else
join_clause = " INNER JOIN " * string($(esc(join_table))) * " ON " * string($(esc(join_table)), ".", $lhs_col_str, " = ", sq.from, ".", $rhs_col_str)
join_clause = " INNER JOIN " * string($(esc(join_table))) * " ON " * string(gbq_join_parse($(esc(join_table))), ".", $lhs_col_str, " = ", gbq_join_parse(sq.from), ".", $rhs_col_str)
sq.from *= join_clause
end

Expand Down Expand Up @@ -162,8 +172,8 @@ macro full_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=noth

most_recent_source = !isempty(sq.ctes) ? "cte_" * string(sq.cte_count - 1) : sq.from

join_sql = " " * most_recent_source * ".*, " * string($(esc(join_table))) * ".* FROM " * most_recent_source *
" FULL JOIN " * string($(esc(join_table))) * " ON " * string($(esc(join_table)), ".", $lhs_col_str, " = ", most_recent_source, ".", $rhs_col_str)
join_sql = " " * most_recent_source * ".*, " * string(gbq_join_parse($(esc(join_table)))) * ".* FROM " * gbq_join_parse(most_recent_source) *
" FULL JOIN " * string($(esc(join_table))) * " ON " * string(gbq_join_parse($(esc(join_table))), ".", $lhs_col_str, " = ", gbq_join_parse(most_recent_source), ".", $rhs_col_str)

# Create and add the new CTE
new_cte = CTE(name=cte_name, select=join_sql)
Expand All @@ -172,7 +182,7 @@ macro full_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=noth
# Update the FROM clause
sq.from = cte_name
else
join_clause = " FULL JOIN " * string($(esc(join_table))) * " ON " * string($(esc(join_table)), ".", $lhs_col_str, " = ", sq.from, ".", $rhs_col_str)
join_clause = " FULL JOIN " * string($(esc(join_table))) * " ON " * string(gbq_join_parse($(esc(join_table))), ".", $lhs_col_str, " = ", gbq_join_parse(sq.from), ".", $rhs_col_str)
sq.from *= join_clause
end

Expand Down Expand Up @@ -210,8 +220,8 @@ macro semi_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=noth

most_recent_source = !isempty(sq.ctes) ? "cte_" * string(sq.cte_count - 1) : sq.from

join_sql = " " * most_recent_source * ".*, " * string($(esc(join_table))) * ".* FROM " * most_recent_source *
" SEMI JOIN " * string($(esc(join_table))) * " ON " * string($(esc(join_table)), ".", $lhs_col_str, " = ", most_recent_source, ".", $rhs_col_str)
join_sql = " " * most_recent_source * ".*, " * string(gbq_join_parse($(esc(join_table)))) * ".* FROM " * gbq_join_parse(most_recent_source) *
" SEMI JOIN " * string($(esc(join_table))) * " ON " * string(gbq_join_parse($(esc(join_table))), ".", $lhs_col_str, " = ", gbq_join_parse(most_recent_source), ".", $rhs_col_str)

# Create and add the new CTE
new_cte = CTE(name=cte_name, select=join_sql)
Expand All @@ -220,7 +230,7 @@ macro semi_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=noth
# Update the FROM clause
sq.from = cte_name
else
join_clause = " SEMI JOIN " * string($(esc(join_table))) * " ON " * string($(esc(join_table)), ".", $lhs_col_str, " = ", sq.from, ".", $rhs_col_str)
join_clause = " SEMI JOIN " * string($(esc(join_table))) * " ON " * string(gbq_join_parse($(esc(join_table))), ".", $lhs_col_str, " = ", gbq_join_parse(sq.from), ".", $rhs_col_str)
sq.from *= join_clause
end

Expand Down Expand Up @@ -258,8 +268,8 @@ macro anti_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=noth

most_recent_source = !isempty(sq.ctes) ? "cte_" * string(sq.cte_count - 1) : sq.from

join_sql = " " * most_recent_source * ".*, " * string($(esc(join_table))) * ".* FROM " * most_recent_source *
" ANTI JOIN " * string($(esc(join_table))) * " ON " * string($(esc(join_table)), ".", $lhs_col_str, " = ", most_recent_source, ".", $rhs_col_str)
join_sql = " " * most_recent_source * ".*, " * string(gbq_join_parse($(esc(join_table)))) * ".* FROM " * gbq_join_parse(most_recent_source) *
" ANTI JOIN " * string($(esc(join_table))) * " ON " * string(gbq_join_parse($(esc(join_table))), ".", $lhs_col_str, " = ", gbq_join_parse(most_recent_source), ".", $rhs_col_str)

# Create and add the new CTE
new_cte = CTE(name=cte_name, select=join_sql)
Expand All @@ -268,7 +278,7 @@ macro anti_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=noth
# Update the FROM clause
sq.from = cte_name
else
join_clause = " ANTI JOIN " * string($(esc(join_table))) * " ON " * string($(esc(join_table)), ".", $lhs_col_str, " = ", sq.from, ".", $rhs_col_str)
join_clause = " ANTI JOIN " * string($(esc(join_table))) * " ON " * string(gbq_join_parse($(esc(join_table))), ".", $lhs_col_str, " = ", gbq_join_parse(sq.from), ".", $rhs_col_str)
sq.from *= join_clause
end

Expand All @@ -283,4 +293,5 @@ macro anti_join(sqlquery, join_table, lhs_column, rhs_column, athena_params=noth
end
sq
end
end
end

Loading

0 comments on commit b5da180

Please sign in to comment.