[Task]: Finish adding Postgres Integration to Analytics Library (#72)

Fixes #45 * update `config.py` database url * add function in `cli.py` * updated packages in `poetry.lock` N/A Row created manually in the database alongside a row created via `test_connection` ![Screen Shot 2024-06-11 at 1 49 53 PM](https://github.com/navapbc/simpler-grants-gov/assets/37313082/b83afad8-5fe1-404f-adf3-c94945740bbe)
navapbc · Sep 18, 2024 · 1055985 · 1055985
1 parent c3503b7
commit 1055985
Show file tree

Hide file tree

Showing 7 changed files with 804 additions and 500 deletions.
diff --git a/analytics/config.py b/analytics/config.py
@@ -8,6 +8,7 @@
 
 For more information visit: https://www.dynaconf.com/
 """
+
 from dynaconf import Dynaconf, Validator, ValidationError
 
 settings = Dynaconf(
@@ -16,6 +17,8 @@
     # looks for config vars in the following files
     # with vars in .secrets.toml overriding vars in settings.toml
     settings_files=["settings.toml", ".secrets.toml"],
+    # merge the settings found in all files
+    merge_enabled= True,
     # add validators for our required config vars
     validators=[
         Validator("SLACK_BOT_TOKEN", must_exist=True),

diff --git a/analytics/poetry.lock b/analytics/poetry.lock
diff --git a/analytics/pyproject.toml b/analytics/pyproject.toml
@@ -20,6 +20,7 @@ pydantic = "^2.0.3"
 python = "^3.11"
 slack-sdk = "^3.23.0"
 typer = { extras = ["all"], version = "^0.9.0" }
+sqlalchemy = "^2.0.30"
 
 [tool.poetry.group.dev.dependencies]
 black = "^23.7.0"

diff --git a/analytics/settings.toml b/analytics/settings.toml
@@ -0,0 +1,4 @@
+POSTGRES_NAME = "app"
+POSTGRES_HOST = "0.0.0.0"
+POSTGRES_USER = "app"
+POSTGRES_PORT = 5432
diff --git a/analytics/src/analytics/cli.py b/analytics/src/analytics/cli.py
@@ -1,19 +1,23 @@
 # pylint: disable=C0415
 """Expose a series of CLI entrypoints for the analytics package."""
+import logging
 from pathlib import Path
 from typing import Annotated, Optional
 
 import typer
 from slack_sdk import WebClient
+from sqlalchemy import text
 
 from analytics.datasets.deliverable_tasks import DeliverableTasks
 from analytics.datasets.sprint_board import SprintBoard
-from analytics.integrations import github, slack
+from analytics.integrations import db, github, slack
 from analytics.metrics.base import BaseMetric, Unit
 from analytics.metrics.burndown import SprintBurndown
 from analytics.metrics.burnup import SprintBurnup
 from analytics.metrics.percent_complete import DeliverablePercentComplete
 
+logger = logging.getLogger(__name__)
+
 # fmt: off
 # Instantiate typer options with help text for the commands below
 SPRINT_FILE_ARG = typer.Option(help="Path to file with exported sprint data")
@@ -122,6 +126,29 @@ def calculate_sprint_burnup(
     )
 
 
+@export_app.command(name="test_connection")
+def test_connection() -> None:
+    """Test function that ensures the DB connection works."""
+    engine = db.get_db()
+    # connection method from sqlalchemy
+    connection = engine.connect()
+
+    # Test INSERT INTO action
+    result = connection.execute(
+        text(
+            "INSERT INTO audit_log (topic,timestamp, end_timestamp, user_id, details)"
+            "VALUES('test','2024-06-11 10:41:15','2024-06-11 10:54:15',87654,'test from command');",
+        ),
+    )
+    # Test SELECT action
+    result = connection.execute(text("SELECT * FROM audit_log WHERE user_id=87654;"))
+    for row in result:
+        print(row)
+    # commits the transaction to the db
+    connection.commit()
+    result.close()
+
+
 @metrics_app.command(name="deliverable_percent_complete")
 def calculate_deliverable_percent_complete(
     sprint_file: Annotated[str, SPRINT_FILE_ARG],

diff --git a/analytics/src/analytics/datasets/base.py b/analytics/src/analytics/datasets/base.py
@@ -1,8 +1,11 @@
+# ruff: noqa: E501
+# pylint: disable=C0301
 """Base class for all datasets which provides an interface for metrics."""
 from pathlib import Path
 from typing import Self
 
 import pandas as pd
+from sqlalchemy import Engine
 
 
 class BaseDataset:
@@ -22,6 +25,82 @@ def from_dict(cls, data: list[dict]) -> Self:
         """Load the dataset from a list of python dictionaries representing records."""
         return cls(df=pd.DataFrame(data))
 
+    def to_sql(
+        self,
+        output_table: str,
+        engine: Engine,
+        *,
+        replace_table: bool = True,
+    ) -> None:
+        """
+        Write the contents of a pandas DataFrame to a SQL table.
+
+        This function takes a pandas DataFrame (`self.df`), an output table name (`output_table`),
+        and a SQLAlchemy Engine object (`engine`) as required arguments. It optionally accepts
+        a `replace_table` argument (default: True) that determines how existing data in the
+        target table is handled.
+
+        **Parameters:**
+
+        * self (required): The instance of the class containing the DataFrame (`self.df`)
+            to be written to the database.
+        * output_table (str, required): The name of the table in the database where the
+            data will be inserted.
+        * engine (sqlalchemy.engine.Engine, required): A SQLAlchemy Engine object representing
+            the connection to the database.
+        * replace_table (bool, default=True):
+            * If True (default), the function will completely replace the contents of the
+            existing table with the data from the DataFrame. (if_exists="replace")
+            * If False, the data from the DataFrame will be appended to the existing table.
+            (if_exists="append")
+
+        **Returns:**
+
+        * None
+
+        **Raises:**
+
+        * Potential exceptions raised by the underlying pandas.to_sql function, such as
+            database connection errors or errors related to data type mismatches.
+        """
+        if replace_table:
+            self.df.to_sql(output_table, engine, if_exists="replace", index=False)
+        else:
+            self.df.to_sql(output_table, engine, if_exists="append", index=False)
+
+    @classmethod
+    def from_sql(
+        cls,
+        source_table: str,
+        engine: Engine,
+    ) -> Self:
+        """
+        Read data from a SQL table into a pandas DataFrame and creates an instance of the current class.
+
+        This function takes a source table name (`source_table`) and a SQLAlchemy Engine object (`engine`) as required arguments.
+        It utilizes pandas.read_sql to retrieve the data from the database and then creates a new instance of the current class (`cls`) initialized with the resulting DataFrame (`df`).
+
+        **Parameters:**
+
+        * cls (class, required): The class that will be instantiated with the data from the
+        SQL table. This allows for creating objects of the same type as the function is called on.
+        * source_table (str, required): The name of the table in the database from which the
+        data will be read.
+        * engine (sqlalchemy.engine.Engine, required): A SQLAlchemy Engine object representing
+        the connection to the database.
+
+        **Returns:**
+
+        * Self: A new instance of the current class (`cls`) initialized with the DataFrame
+        containing the data from the SQL table.
+
+        **Raises:**
+
+        * Potential exceptions raised by the underlying pandas.read_sql function, such as
+        database connection errors or errors related to data type mismatches.
+        """
+        return cls(df=pd.read_sql(source_table, engine))
+
     def to_csv(
         self,
         output_file: Path,

diff --git a/analytics/src/analytics/integrations/db.py b/analytics/src/analytics/integrations/db.py
@@ -0,0 +1,29 @@
+# pylint: disable=invalid-name, line-too-long
+"""Get a connection to the database using a SQLAlchemy engine object."""
+
+from sqlalchemy import Engine, create_engine
+
+from config import settings
+
+
+# The variables used in the connection url are set in settings.toml and
+# .secrets.toml. These can be overridden with the custom prefix defined in config.py: "ANALYTICS".
+# e.g. `export ANALYTICS_POSTGRES_USER=new_usr`.
+# Docs: https://www.dynaconf.com/envvars/
+def get_db() -> Engine:
+    """
+    Get a connection to the database using a SQLAlchemy engine object.
+
+    This function retrieves the database connection URL from the configuration
+    and creates a SQLAlchemy engine object.
+
+    Yields
+    ------
+    sqlalchemy.engine.Engine
+    A SQLAlchemy engine object representing the connection to the database.
+    """
+    return create_engine(
+        f"postgresql+psycopg://{settings.postgres_user}:{settings.postgres_password}@{settings.postgres_host}:{settings.postgres_port}",
+        pool_pre_ping=True,
+        hide_parameters=True,
+    )