Skip to content

Commit

Permalink
refactor: project
Browse files Browse the repository at this point in the history
  • Loading branch information
artem-burashnikov committed Nov 7, 2023
1 parent 3b16162 commit 0fccbd1
Show file tree
Hide file tree
Showing 5 changed files with 78 additions and 54 deletions.
24 changes: 7 additions & 17 deletions depinspect/helper.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,8 @@
import tempfile
from enum import Enum
from pathlib import Path
from re import fullmatch


class Archs(Enum):
I386 = "i386"
ADM64 = "amd64"
RISCV64 = "riscv64"
ANY = "any"
ALL = "all"


def get_project_root() -> Path:
"""
Returns the Path object representing the root directory of the project.
Expand All @@ -22,13 +13,9 @@ def get_project_root() -> Path:
Returns:
Path: A Path object representing the project root directory.
"""
return Path(
__file__
).parent.parent # if helper.py is moved this breaks. Don't move!


def get_sources_path(project_root: Path) -> Path:
return project_root / Path("sources.cfg")
return (
Path(__file__).absolute().resolve().parent.parent
) # if helper.py is moved this breaks. Don't move!


def create_temp_dir(dir_prefix: str, output_path: Path) -> Path:
Expand Down Expand Up @@ -64,6 +51,9 @@ def is_valid_package_name(package_name: str) -> bool:


def is_valid_architecture_name(architecture_name: str) -> bool:
# Importing here avoids circular dependency.
from depinspect.definitions import ARCHITECTURES

"""
Checks if a given string is a valid architecture name by comparing it to a predefined list of architectures.
Expand All @@ -77,4 +67,4 @@ def is_valid_architecture_name(architecture_name: str) -> bool:
- The function compares the input architecture_name with the values of a predefined enum.
- The enum or class 'Archs' should contain valid architecture names.
"""
return any(architecture_name == arch.value for arch in Archs)
return architecture_name in ARCHITECTURES
35 changes: 31 additions & 4 deletions depinspect/load/sqlite_db.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,48 @@
import logging
import sqlite3
from pathlib import Path
from sys import exit

from depinspect import files


def db_remove(db_path: Path) -> bool:
def db_remove(db_path: Path) -> None:
"""
Removes an SQLite3 database file specified by the given path.
Args:
- db_path (Path): The path to the SQLite3 database file.
Returns:
bool: True if the database file was successfully removed, False otherwise.
Notes:
- The function checks if the file extension is ".db" before attempting to remove it.
"""
file_extension = db_path.suffix
if file_extension == ".db":
logging.info("Removing database.")
files.remove_file(db_path)
return True
logging.warning(f"File specified at {db_path} is not an sqlite3 database.")
return False
else:
logging.error(f"File specified at {db_path} is not an sqlite3 database.")
exit(1)


def db_new(db_name: str, output_path: Path) -> Path:
"""
Creates a new SQLite3 database with the specified name and path, initializing pre-defined tables.
Args:
- db_name (str): The name of the new SQLite3 database.
- output_path (Path): The directory where the new database will be created.
Returns:
Path: A Path object representing the path to the newly created database.
Notes:
- If a database with the same name exists, it is removed before creating a new one.
- Two tables, 'Packages' and 'Dependencies', are created with necessary columns.
"""
db_path = output_path / Path(db_name)

if db_path.is_file() and db_path.suffix == ".db":
Expand Down
24 changes: 14 additions & 10 deletions depinspect/load/ubuntu/metadata.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import sys
from pathlib import Path
from sqlite3 import connect
from sys import exit
from typing import List

from depinspect.files import list_files_in_directory
Expand Down Expand Up @@ -38,11 +38,11 @@ def parse_string_to_list(
def process_metadata_into_db(file_path: Path, db_path: Path) -> None:
if file_path.suffix != ".txt":
logging.exception(f"{file_path.name} is not a valid metadata file.")
sys.exit(1)
exit(1)

if db_path.suffix != ".db":
logging.exception(f"{db_path.name} is not a valid sqlite3 database.")
sys.exit(1)
exit(1)

db_connection = connect(db_path)

Expand All @@ -56,17 +56,12 @@ def process_metadata_into_db(file_path: Path, db_path: Path) -> None:

for line in file:
if line.startswith("Package:"):
# Extract the 'Package' information
package_name = line[len("Package:") :].strip()

elif line.startswith("Version:"):
# Extract the 'Version' information
version = line[len("Version:") :].strip()

elif line.startswith("Architecture:"):
# Extract the 'Architecture' information.
# Several acrhitecture strings provided by a '$ dpkg-architecture -L' command
# COULD be listed. Usually any, all or specific.
parse_string_to_list(
string=line,
prefix_to_exclude="Architecture:",
Expand All @@ -75,7 +70,6 @@ def process_metadata_into_db(file_path: Path, db_path: Path) -> None:
)

elif line.startswith("Depends:"):
# Extract the 'Depends' information as a list
parse_string_to_list(
string=line,
prefix_to_exclude="Depends:",
Expand All @@ -84,7 +78,6 @@ def process_metadata_into_db(file_path: Path, db_path: Path) -> None:
)

elif line.startswith("\n"):
# Process previously red metadata when a blank line is encountered
if package_name and version and architecture:
result = db_connection.execute(
"INSERT INTO Packages (package_name, version, distribution, architecture) VALUES (?, ?, ?, ?)",
Expand All @@ -111,6 +104,17 @@ def process_metadata_into_db(file_path: Path, db_path: Path) -> None:


def run_ubuntu_metadata_processing(tmp_dir: Path, db_path: Path) -> None:
"""
Processes Ubuntu metadata files in a temporary directory and populates the specified SQLite3 database.
Args:
- tmp_dir (Path): The temporary directory containing Ubuntu metadata files.
- db_path (Path): The path to the SQLite3 database to be populated.
Notes:
- Filters txt files in the temporary directory which names start with "ubuntu".
- Processes each metadata file and populates the SQLite3 database.
"""
txt_files = [
txt_file
for txt_file in list_files_in_directory(tmp_dir)
Expand Down
43 changes: 24 additions & 19 deletions depinspect/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@

import click

from depinspect.definitions import DB_NAME, ROOT_DIR, SOURCES_FILE_PATH
from depinspect.helper import (
create_temp_dir,
get_project_root,
get_sources_path,
is_valid_architecture_name,
is_valid_package_name,
)
Expand Down Expand Up @@ -55,43 +54,49 @@ def main(
package2: Tuple[str, str],
update: bool,
) -> None:
def init(config_file: Path, project_root: Path) -> None:
tmp_dir = create_temp_dir(dir_prefix=".tmp", output_path=project_root)
fetch_and_save_metadata(config_file, tmp_dir)
process_archives(tmp_dir)

db_path = sqlite_db.db_new(db_name="dependencies.db", output_path=project_root)
def init(config_path: Path, db_name: str, output_path: Path) -> None:
tmp_dir = create_temp_dir(dir_prefix=".tmp", output_path=output_path)
db_path = sqlite_db.db_new(db_name=db_name, output_path=output_path)

try:
logging.info("Fetching archives from pre-defined URL sources.")
fetch_and_save_metadata(config_path, tmp_dir)
logging.info("Fetching: Success.")

logging.info("Extracting archives.")
process_archives(tmp_dir)
logging.info("Extracting: Sucess.")

logging.info("Processing ubuntu metadata into database.")
run_ubuntu_metadata_processing(tmp_dir, db_path)
logging.info("Ubuntu processing: Success.")

except Exception:
logging.exception(
"There was an exception trying to process ubuntu metadata."
"There was an exception trying to pull data into database."
)
logging.error("Removing database, if exists, as it might be corrupted.")
if db_path.is_file() and db_path.suffix == ".db":
logging.info("Removing database as it may be corrupted.")
sqlite_db.db_remove(db_path)

finally:
logging.info("Cleaning up.")
rmtree(tmp_dir)
logging.info("Done.")

project_root = get_project_root()
metadata_sources_file = get_sources_path(project_root)

# Update flag has been passed.
if update:
init(metadata_sources_file, project_root)
logging.info("Re-initialization is complete.")
init(config_path=SOURCES_FILE_PATH, db_name=DB_NAME, output_path=ROOT_DIR)
logging.info("Update complete.")
ctx.exit(0)

if not Path.joinpath(project_root, "dependencies.db").is_file():
init(metadata_sources_file, project_root)
if not Path.joinpath(ROOT_DIR, DB_NAME).is_file():
init(config_path=SOURCES_FILE_PATH, db_name=DB_NAME, output_path=ROOT_DIR)
else:
logging.info("Using existing database")

# At this point database exists in the project root either from before or (re)-initialized.
db_path = project_root / Path("dependencies.db")
# At this point database MUST exist in the project root either from earlier usage or (re)-initialized.
db_path = ROOT_DIR / DB_NAME

if not package1 or not package2:
print(
Expand Down
6 changes: 2 additions & 4 deletions tests/load/test_fetch.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
from urllib import error, request

from depinspect.helper import get_project_root, get_sources_path
from depinspect.definitions import SOURCES_FILE_PATH
from depinspect.load.fetch import read_config


# Check that all urls defined in sources.cfg are reachable
def test_URL_sources() -> None:
project_root = get_project_root()
sources = get_sources_path(project_root)
metadata_sources = read_config(sources)
metadata_sources = read_config(SOURCES_FILE_PATH)

for section in metadata_sources.sections():
for key in metadata_sources[section]:
Expand Down

0 comments on commit 0fccbd1

Please sign in to comment.