Skip to content

Commit

Permalink
Merge pull request #29 from TidierOrg/oracle_odbc-support
Browse files Browse the repository at this point in the history
add oracle support via odbc
  • Loading branch information
drizk1 committed Jun 17, 2024
2 parents acaa225 + ab2570e commit 7b03379
Show file tree
Hide file tree
Showing 7 changed files with 206 additions and 14 deletions.
5 changes: 4 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# TidierDB.jl updates

## v0.1.7 - 2024-06-17
- Adds support for Oracle backend via ODBC.jl connection

## v0.1.6 - 2024-06-11
- Adds `@interpolate` and documentation around building macros with TidierDB chains and interpolation

Expand All @@ -19,7 +22,7 @@
- Interpolation bug fix to allow interpolating vector of strings with the syntax: `@filter(column_name in [!!vector]) `

## v0.1.2 - 2024-05-07
- Adds AWS Athena backend support
- Adds AWS Athena backend support via AWS.jl

## v0.1.1 - 2024-04-12
- Fixes metadata retrieval for MariaDB
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "TidierDB"
uuid = "86993f9b-bbba-4084-97c5-ee15961ad48b"
authors = ["Daniel Rizk <rizk.daniel.12@gmail.com> and contributors"]
version = "0.1.6"
version = "0.1.7"

[deps]
AWS = "fbe9abb3-538b-5e4e-ba9e-bc94f4f92ebc"
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ The main goal of TidierDB.jl is to bring the syntax of Tidier.jl to multiple SQL
- Postgres `set_sql_mode(:postgres)`
- Athena `set_sql_mode(:athena)`
- Google Big Query `set_sql_mode(:gbq)`
- Oracle `set_sql_mode(:oracle)`

The style of SQL that is generated can be modified using `set_sql_mode()`.

Expand Down
3 changes: 2 additions & 1 deletion docs/examples/UserGuide/getting_started.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

# Alternatively, `using Tidier` will import TidierDB in the above manner for you, where TidierDB functions and macros will be available as `DB.@mutate()` and so on, and the TidierData equivalent would be `@mutate()`.

# There are two ways to connect to the database. you can use `connect` without any need to load any additional packages.
# There are two ways to connect to the database. You can use `connect` without any need to load any additional packages. However, Oracle and Athena do not support this method yet and will require you to load in ODBC.jl or AWS.jl respectively.

# For example
# Connecting to MySQL
Expand All @@ -29,5 +29,6 @@
# - Postgres: LibPQ.jl
# - SQLite: SQLite.jl
# - Athena: AWS.jl
# - Oracle: ODBC.jl

# For DuckDB, SQLite, and MySQL, `copy_to()` lets you copy data to the database and query there. ClickHouse, MSSQL, and Postgres support for `copy_to()` has not been added yet.
2 changes: 2 additions & 0 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ The main goal of TidierDB.jl is to bring the syntax of Tidier.jl to multiple SQL
- Postgres `set_sql_mode(:postgres)`
- Athena `set_sql_mode(:athena)`
- Google Big Query `set_sql_mode(:gbq)`
- Oracle `set_sql_mode(:oracle)`


The style of SQL that is generated can be modified using `set_sql_mode()`.

Expand Down
33 changes: 22 additions & 11 deletions src/TidierDB.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ include("parsing_mssql.jl")
include("parsing_clickhouse.jl")
include("parsing_athena.jl")
include("parsing_gbq.jl")
include("parsing_oracle.jl")
include("joins_sq.jl")
include("slices_sq.jl")

Expand Down Expand Up @@ -68,6 +69,8 @@ function expr_to_sql(expr, sq; from_summarize::Bool = false)
return expr_to_sql_trino(expr, sq; from_summarize=from_summarize)
elseif current_sql_mode[] == :gbq
return expr_to_sql_gbq(expr, sq; from_summarize=from_summarize)
elseif current_sql_mode[] == :oracle
return expr_to_sql_oracle(expr, sq; from_summarize=from_summarize)
else
error("Unsupported SQL mode: $(current_sql_mode[])")
end
Expand Down Expand Up @@ -131,7 +134,7 @@ function finalize_query(sqlquery::SQLQuery)
"FROM )" => ")" , "SELECT SELECT " => "SELECT ", "SELECT SELECT " => "SELECT ", "DISTINCT SELECT " => "DISTINCT ",
"SELECT SELECT SELECT " => "SELECT ", "PARTITION BY GROUP BY" => "PARTITION BY", "GROUP BY GROUP BY" => "GROUP BY", "HAVING HAVING" => "HAVING", )

if current_sql_mode[] == :postgres || current_sql_mode[] == :duckdb || current_sql_mode[] == :mysql || current_sql_mode[] == :mssql || current_sql_mode[] == :clickhouse || current_sql_mode[] == :athena || current_sql_mode[] == :gbq
if current_sql_mode[] == :postgres || current_sql_mode[] == :duckdb || current_sql_mode[] == :mysql || current_sql_mode[] == :mssql || current_sql_mode[] == :clickhouse || current_sql_mode[] == :athena || current_sql_mode[] == :gbq || current_sql_mode[] == :oracle
complete_query = replace(complete_query, "\"" => "'", "==" => "=")
end

Expand Down Expand Up @@ -200,20 +203,28 @@ end

# MSSQL
function get_table_metadata(conn::ODBC.Connection, table_name::String)
# Query to get column names and types from INFORMATION_SCHEMA
query = """
SELECT column_name, data_type
FROM information_schema.columns
WHERE table_name = '$table_name'
ORDER BY ordinal_position;
"""
if current_sql_mode[] == :oracle
table_name = uppercase(table_name)
query = """
SELECT column_name, data_type
FROM all_tab_columns
WHERE table_name = '$table_name'
ORDER BY column_id
"""
else
query = """
SELECT column_name, data_type
FROM information_schema.columns
WHERE table_name = '$table_name'
ORDER BY ordinal_position;
"""
end

result = DBInterface.execute(conn, query) |> DataFrame
#result[!, :DATA_TYPE] = map(x -> String(x), result.DATA_TYPE)
result[!, :current_selxn] .= 1
result[!, :table_name] .= table_name
# Adjust the select statement to include the new table_name column
return select(result, :column_name => :name, :data_type => :type, :current_selxn, :table_name,)
return select(result, :column_name => :name, :data_type => :type, :current_selxn, :table_name)
end

# ClickHouse
Expand All @@ -238,7 +249,7 @@ function db_table(db, table, athena_params::Any=nothing)
table_name = string(table)
metadata = if current_sql_mode[] == :lite
get_table_metadata(db, table_name)
elseif current_sql_mode[] == :postgres || current_sql_mode[] == :duckdb || current_sql_mode[] == :mysql || current_sql_mode[] == :mssql || current_sql_mode[] == :clickhouse || current_sql_mode[] == :gbq
elseif current_sql_mode[] == :postgres || current_sql_mode[] == :duckdb || current_sql_mode[] == :mysql || current_sql_mode[] == :mssql || current_sql_mode[] == :clickhouse || current_sql_mode[] == :gbq || current_sql_mode[] == :oracle
get_table_metadata(db, table_name)
elseif current_sql_mode[] == :athena
get_table_metadata_athena(db, table_name, athena_params)
Expand Down
174 changes: 174 additions & 0 deletions src/parsing_oracle.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
function expr_to_sql_oracle(expr, sq; from_summarize::Bool)
expr = parse_char_matching(expr)
expr = exc_capture_bug(expr, names_to_modify)
MacroTools.postwalk(expr) do x
# Handle basic arithmetic and functions
if @capture(x, a_ + b_)
return :($a + $b)
elseif @capture(x, a_ - b_)
return :($a - $b)
elseif @capture(x, a_ * b_)
return :($a * $b)
elseif @capture(x, a_ / b_)
return :($a / $b)
elseif @capture(x, a_ ^ b_)
return :(POWER($a, $b))
elseif @capture(x, round(a_))
return :(ROUND($a))
elseif @capture(x, round(a_, b_))
return :(ROUND($a, $b))
elseif @capture(x, mean(a_))
if from_summarize
return :(AVG($a))
else
window_clause = construct_window_clause(sq)
return "AVG($(string(a))) $(window_clause)"
end
elseif @capture(x, minimum(a_))
if from_summarize
return :(MIN($a))
else
window_clause = construct_window_clause(sq)
return "MIN($(string(a))) $(window_clause)"
end
elseif @capture(x, maximum(a_))
if from_summarize
return :(MAX($a))
else
window_clause = construct_window_clause(sq)
return "MAX($(string(a))) $(window_clause)"
end
elseif @capture(x, sum(a_))
if from_summarize
return :(SUM($a))
else
window_clause = construct_window_clause(sq)
return "SUM($(string(a))) $(window_clause)"
end
elseif @capture(x, cumsum(a_))
if from_summarize
error("cumsum is only available through a windowed @mutate")
else
# sq.windowFrame = "ROWS UNBOUNDED PRECEDING "
window_clause = construct_window_clause(sq, from_cumsum = true)
return "SUM($(string(a))) $(window_clause)"
end
#stats agg
elseif @capture(x, std(a_))
if from_summarize
return :(STDDEV_SAMP($a))
else
window_clause = construct_window_clause(sq, )
return "STDDEV_SAMP($(string(a))) $(window_clause)"
end
elseif @capture(x, cor(a_, b_))
if from_summarize
return :(CORR($a))
else
window_clause = construct_window_clause(sq)
return "CORR($(string(a))) $(window_clause)"
end
elseif @capture(x, cov(a_, b_))
if from_summarize
return :(COVAR_SAMP($a))
else
window_clause = construct_window_clause(sq)
return "COVAR_SAMP($(string(a))) $(window_clause)"
end
elseif @capture(x, var(a_))
if from_summarize
return :(VAR_SAMP($a))
else
window_clause = construct_window_clause(sq)
return "VAR_SAMP($(string(a))) $(window_clause)"
end
#elseif @capture(x, sql_agg(str_))
# if from_summarize
# return error("sql_agg is only needed with aggregate functions in @mutate")
# else
# window_clause = construct_window_clause(sq)
# return "$(str) $(window_clause)"
# end
#stringr functions, have to use function that removes _ so capture can capture name
elseif @capture(x, strreplaceall(str_, pattern_, replace_))
return :(REPLACE($str, $pattern, $replace))
elseif @capture(x, strreplace(str_, pattern_, replace_))
return error("str_replace is not yet supported for oracle support. Only str_replace_all")
elseif @capture(x, strremoveall(str_, pattern_))
return :(REPLACE($str, $pattern, ""))
elseif @capture(x, strremove(str_, pattern_))
return error("str_remove is not yet supported for oracle support. Only str_remove_all ")
elseif @capture(x, ismissing(a_))
return "($(string(a)) IS NULL)"
# Date extraction functions
elseif @capture(x, year(a_))
return "DATE(YEAR FROM " * string(a) * ")"
elseif @capture(x, month(a_))
return "DATE(MONTH FROM " * string(a) * ")"
elseif @capture(x, day(a_))
return "DATE(DAY FROM " * string(a) * ")"
elseif @capture(x, hour(a_))
return "DATE(HOUR FROM " * string(a) * ")"
elseif @capture(x, minute(a_))
return "DATE(MINUTE FROM " * string(a) * ")"
elseif @capture(x, second(a_))
return "DATE(SECOND FROM " * string(a) * ")"
elseif @capture(x, floordate(time_column_, unit_))
return floordate_to_oracle(unit, time_column)
elseif @capture(x, difftime(endtime_, starttime_, unit_))
return :(DATE_DIFF($unit, $starttime, $endtime))
elseif @capture(x, replacemissing(column_, replacement_value_))
return :(COALESCE($column, $replacement_value))
elseif @capture(x, missingif(column_, value_to_replace_))
return :(NULLIF($column, $value_to_replace))
elseif isa(x, Expr) && x.head == :call
if x.args[1] == :if_else && length(x.args) == 4
return parse_if_else(x)
elseif x.args[1] == :as_float && length(x.args) == 2
column = x.args[2]
return "CAST(" * string(column) * " AS DECIMAL)"
elseif x.args[1] == :as_integer && length(x.args) == 2
column = x.args[2]
return "CAST(" * string(column) * " AS INTEGER)"
elseif x.args[1] == :as_string && length(x.args) == 2
column = x.args[2]
return "CAST(" * string(column) * " AS STRING)"
elseif x.args[1] == :case_when
return parse_case_when(x)
elseif isa(x, Expr) && x.head == :call && x.args[1] == :! && x.args[1] != :!= && length(x.args) == 2
inner_expr = expr_to_sql_oracle(x.args[2], sq, from_summarize = false) # Recursively transform the inner expression
return string("NOT (", inner_expr, ")")
elseif x.args[1] == :str_detect && length(x.args) == 3
column, pattern = x.args[2], x.args[3]
return string(column, " LIKE \'%", pattern, "%'")
elseif isa(x, Expr) && x.head == :call && x.args[1] == :n && length(x.args) == 1
return "COUNT(*)"
end
end
return x
end
end


function floordate_to_oracle(unit::String, time_column::Symbol)
sql_command = ""

if unit == "second"
# Flooring to the nearest second requires a different anchor, like '2000-01-01'
sql_command = "DATEADD(SECOND, DATEDIFF(SECOND, '2000-01-01', $(string(time_column))), '2000-01-01')"
elseif unit == "minute"
sql_command = "DATEADD(MINUTE, DATEDIFF(MINUTE, 0, $(string(time_column))), 0)"
elseif unit == "hour"
sql_command = "DATEADD(HOUR, DATEDIFF(HOUR, 0, $(string(time_column))), 0)"
elseif unit == "day"
sql_command = "DATEADD(DAY, DATEDIFF(DAY, 0, $(string(time_column))), 0)"
elseif unit == "month"
sql_command = "DATEADD(MONTH, DATEDIFF(MONTH, 0, $(string(time_column))), 0)"
elseif unit == "year"
sql_command = "DATEADD(YEAR, DATEDIFF(YEAR, 0, $(string(time_column))), 0)"
else
throw(ArgumentError("Unsupported unit: $unit"))
end

return sql_command
end

0 comments on commit 7b03379

Please sign in to comment.