Skip to content

Commit

Permalink
Add option to fix or freely choose random seeds (#191)
Browse files Browse the repository at this point in the history
* Record seeds for classes, allow to fix all the random number generators

* Save SQLITE files relative to examples folder

* Reset cache before reconnection

* Show which connection is closed
  • Loading branch information
1kastner committed Sep 9, 2023
1 parent a7d44f6 commit acb58ba
Show file tree
Hide file tree
Showing 37 changed files with 289 additions and 75 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -62,3 +62,4 @@ examples/Python_Script/databases/

# Ignore local changes as they happen with every execution. If something changes, the commit must be forced.
docs/notebooks/data/prepared_dbs/demo_poc.sqlite
conflowgen/data/tools/
5 changes: 3 additions & 2 deletions conflowgen/api/database_chooser.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ def load_existing_sqlite_database(self, file_name: str) -> None:
"""
if self.peewee_sqlite_db is not None:
self._close_and_reset_db()
self.peewee_sqlite_db = self.sqlite_database_connection.choose_database(file_name, create=False, reset=False)
DataSummariesCache.reset_cache()
self.peewee_sqlite_db = self.sqlite_database_connection.choose_database(file_name, create=False, reset=False)

def create_new_sqlite_database(
self,
Expand Down Expand Up @@ -91,7 +91,8 @@ def close_current_connection(self) -> None:
raise NoCurrentConnectionException("You must first create a connection to an SQLite database.")

def _close_and_reset_db(self):
self.logger.debug("Closing current database connection.")
path_to_sqlite_database = self.sqlite_database_connection.path_to_sqlite_database
self.logger.debug(f"Closing current database connection {path_to_sqlite_database}.")
self.peewee_sqlite_db.close()
self.peewee_sqlite_db = None
DataSummariesCache.reset_cache()
22 changes: 22 additions & 0 deletions conflowgen/application/models/random_seed_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from peewee import AutoField, CharField, IntegerField, BooleanField

from conflowgen.domain_models.base_model import BaseModel


class RandomSeedStore(BaseModel):
"""
This table contains a random seed for each class or function that contains randomness
"""
id = AutoField()

name = CharField(
help_text="The name of the class, function, or other type of object."
)

is_random = BooleanField(
help_text="Whether the value is meant to change between invocations of the generation process."
)

random_seed = IntegerField(
help_text="The last used random seed."
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import logging
import random
import typing
import time

from conflowgen.application.models.random_seed_store import RandomSeedStore


class RandomSeedStoreRepository:

def __init__(self):
self.logger = logging.getLogger("conflowgen")

def get_random_seed(self, seed_name: str, log_loading_process: bool = False) -> float:
random_seed: float
random_seed_store = RandomSeedStore.get_or_none(
RandomSeedStore.name == seed_name
)
if random_seed_store is not None:
if random_seed_store.is_random:
# there is a previous seed but we are told to overwrite it
previous_seed = random_seed_store.random_seed
random_seed = self._get_random_seed()
random_seed_store.random_seed = random_seed
random_seed_store.save()
if log_loading_process:
self.logger.debug(f"Overwrite seed {previous_seed} with {random_seed} for '{seed_name}'")
else:
# there is a previous seed and we should re-use it
random_seed = random_seed_store.random_seed
if log_loading_process:
self.logger.debug(f"Re-use seed {random_seed} for '{seed_name}'")
else:
# there is no previous seed available, enter the current seed and return its value
random_seed = self._get_random_seed()
RandomSeedStore.create(
name=seed_name,
random_seed=random_seed,
is_random=True
)
if log_loading_process:
self.logger.debug(f"Randomly set seed {random_seed} for '{seed_name}'")
return random_seed

@staticmethod
def _get_random_seed() -> int:
return int(time.time())

def fix_random_seed(
self, seed_name: str, random_seed: typing.Optional[int], log_loading_process: bool = False
) -> None:
if random_seed is None:
random_seed = self._get_random_seed()
random_seed_store = RandomSeedStore.get_or_none(
RandomSeedStore.name == seed_name
)
if random_seed_store is None:
random_seed_store = RandomSeedStore.create(
name=seed_name,
is_random=False,
random_seed=random_seed
)
else:
random_seed_store.random_seed = random_seed
if log_loading_process:
self.logger.debug(f"Set seed {random_seed} for '{seed_name}'")
random_seed_store.save()


_random_seed_store_repository = RandomSeedStoreRepository()


def get_initialised_random_object(seed_name: str, log_loading_process: bool = True) -> random.Random:
random_seed = RandomSeedStoreRepository().get_random_seed(seed_name, log_loading_process=log_loading_process)
seeded_random = random.Random(x=random_seed)
return seeded_random
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@

class AverageContainerDwellTimeCalculatorService:

def get_average_container_dwell_time(self, start_date: datetime.date, end_date: datetime.date) -> float:
@staticmethod
def get_average_container_dwell_time(start_date: datetime.date, end_date: datetime.date) -> float:
inbound_vehicle_capacity = InboundAndOutboundVehicleCapacityCalculatorService.get_inbound_capacity_of_vehicles(
start_date=start_date,
end_date=end_date
Expand Down
4 changes: 3 additions & 1 deletion conflowgen/database_connection/create_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import peewee

from conflowgen.application.models.container_flow_generation_properties import ContainerFlowGenerationProperties
from conflowgen.application.models.random_seed_store import RandomSeedStore
from conflowgen.domain_models.arrival_information import TruckArrivalInformationForPickup, \
TruckArrivalInformationForDelivery
from conflowgen.domain_models.container import Container
Expand Down Expand Up @@ -40,7 +41,8 @@ def create_tables(sql_db_connection: peewee.Database) -> peewee.Database:
TruckArrivalInformationForPickup,
TruckArrivalInformationForDelivery,
StorageRequirementDistribution,
ContainerDwellTimeDistribution
ContainerDwellTimeDistribution,
RandomSeedStore,
])
for table_with_index in (
Destination,
Expand Down
16 changes: 8 additions & 8 deletions conflowgen/database_connection/sqlite_database_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def __init__(self, sqlite_databases_directory: Optional[str] = None):
sqlite_databases_directory = self.SQLITE_DEFAULT_DIR
sqlite_databases_directory = os.path.abspath(sqlite_databases_directory)
self.sqlite_databases_directory = sqlite_databases_directory
self.path_to_sqlite_database = ""

self.logger = logging.getLogger("conflowgen")

Expand All @@ -82,16 +83,15 @@ def choose_database(
**seeder_options
) -> SqliteDatabase:
if database_name == ":memory:":
path_to_sqlite_database = ":memory:"
self.path_to_sqlite_database = ":memory:"
sqlite_database_existed_before = False
else:
path_to_sqlite_database, sqlite_database_existed_before = self._load_or_create_sqlite_file_on_hard_drive(
database_name=database_name, create=create, reset=reset
)
self.path_to_sqlite_database, sqlite_database_existed_before = (
self._load_or_create_sqlite_file_on_hard_drive(database_name=database_name, create=create, reset=reset))

self.logger.debug(f"Opening file {path_to_sqlite_database}")
self.logger.debug(f"Opening file {self.path_to_sqlite_database}")
self.sqlite_db_connection = SqliteDatabase(
path_to_sqlite_database,
self.path_to_sqlite_database,
pragmas=self.SQLITE_DEFAULT_SETTINGS
)
database_proxy.initialize(self.sqlite_db_connection)
Expand All @@ -103,12 +103,12 @@ def choose_database(
self.logger.debug(f'foreign_keys: {self.sqlite_db_connection.foreign_keys}')

if not sqlite_database_existed_before or reset:
self.logger.debug(f"Creating new database at {path_to_sqlite_database}")
self.logger.debug(f"Creating new database at {self.path_to_sqlite_database}")
create_tables(self.sqlite_db_connection)
self.logger.debug("Seed with default values...")
seed_all_distributions(**seeder_options)
else:
self.logger.debug(f"Open existing database at {path_to_sqlite_database}")
self.logger.debug(f"Open existing database at {self.path_to_sqlite_database}")

container_flow_properties: ContainerFlowGenerationProperties | None = \
ContainerFlowGenerationProperties.get_or_none()
Expand Down
8 changes: 3 additions & 5 deletions conflowgen/domain_models/factories/container_factory.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from __future__ import annotations

import math
import random
from typing import Dict, MutableSequence, Sequence, Type

from conflowgen.domain_models.container import Container
Expand All @@ -19,19 +18,18 @@
from conflowgen.domain_models.repositories.large_scheduled_vehicle_repository import LargeScheduledVehicleRepository
from conflowgen.domain_models.vehicle import AbstractLargeScheduledVehicle, LargeScheduledVehicle
from conflowgen.tools.distribution_approximator import DistributionApproximator
from conflowgen.application.repositories.random_seed_store_repository import get_initialised_random_object


class ContainerFactory:
"""
Creates containers according to the distributions which are either hard-coded or stored in the database.
"""

ignored_capacity = ContainerLength.get_teu_factor(ContainerLength.other)

random_seed = 1
ignored_capacity = ContainerLength.get_maximum_teu_factor()

def __init__(self):
self.seeded_random = random.Random(x=self.random_seed)
self.seeded_random = get_initialised_random_object(self.__class__.__name__)
self.mode_of_transportation_distribution: dict[ModeOfTransport, dict[ModeOfTransport, float]] | None = None
self.container_length_distribution: dict[ContainerLength, float] | None = None
self.container_weight_distribution: dict[ContainerLength, dict[int, float]] | None = None
Expand Down
28 changes: 16 additions & 12 deletions conflowgen/flow_generator/abstract_truck_for_containers_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
import abc
import logging
import math
import random
from typing import List, Tuple, Union, Optional, Dict, Sequence
import typing

from conflowgen.tools.weekly_distribution import WeeklyDistribution
from ..application.repositories.random_seed_store_repository import get_initialised_random_object
from ..domain_models.data_types.storage_requirement import StorageRequirement
from ..domain_models.container import Container
from ..domain_models.distribution_repositories.container_dwell_time_distribution_repository import \
Expand All @@ -29,23 +29,26 @@ class AbstractTruckForContainersManager(abc.ABC):
def __init__(self):
self.logger = logging.getLogger("conflowgen")

self.seeded_random = get_initialised_random_object(self.__class__.__name__)

self.container_dwell_time_distribution_repository = ContainerDwellTimeDistributionRepository()
self.container_dwell_time_distributions: \
Dict[ModeOfTransport, Dict[ModeOfTransport, Dict[StorageRequirement, ContinuousDistribution]]] | None \
typing.Dict[ModeOfTransport, typing.Dict[
ModeOfTransport, typing.Dict[StorageRequirement, ContinuousDistribution]]] | None \
= None

self.truck_arrival_distribution_repository = TruckArrivalDistributionRepository()

self.truck_arrival_distributions: \
Dict[ModeOfTransport, Dict[StorageRequirement, WeeklyDistribution | None]] = {
typing.Dict[ModeOfTransport, typing.Dict[StorageRequirement, WeeklyDistribution | None]] = {
vehicle: {
storage_requirement: None
for storage_requirement in StorageRequirement
} for vehicle in ModeOfTransport
}

self.vehicle_factory = VehicleFactory()
self.time_window_length_in_hours: Optional[int] = None
self.time_window_length_in_hours: typing.Optional[int] = None

@abc.abstractmethod
def _get_container_dwell_time_distribution(
Expand All @@ -64,7 +67,7 @@ def reload_distributions(
self
) -> None:
# noinspection PyTypeChecker
hour_of_the_week_fraction_pairs: List[Union[Tuple[int, float], Tuple[int, int]]] = \
hour_of_the_week_fraction_pairs: typing.List[typing.Union[typing.Tuple[int, float], typing.Tuple[int, int]]] = \
list(self.truck_arrival_distribution_repository.get_distribution().items())
self.time_window_length_in_hours = hour_of_the_week_fraction_pairs[1][0] - hour_of_the_week_fraction_pairs[0][0]

Expand All @@ -73,7 +76,7 @@ def reload_distributions(

def _update_truck_arrival_and_container_dwell_time_distributions(
self,
hour_of_the_week_fraction_pairs: List[Union[Tuple[int, float], Tuple[int, int]]]
hour_of_the_week_fraction_pairs: typing.List[typing.Union[typing.Tuple[int, float], typing.Tuple[int, int]]]
) -> None:
for vehicle_type in ModeOfTransport:
for storage_requirement in StorageRequirement:
Expand Down Expand Up @@ -111,14 +114,15 @@ def _get_distributions(
return container_dwell_time_distribution, truck_arrival_distribution

@abc.abstractmethod
def _get_truck_arrival_distributions(self, container: Container) -> Dict[StorageRequirement, WeeklyDistribution]:
def _get_truck_arrival_distributions(self, container: Container) -> typing.Dict[
StorageRequirement, WeeklyDistribution]:
pass

def _get_time_window_of_truck_arrival(
self,
container_dwell_time_distribution: ContinuousDistribution,
truck_arrival_distribution_slice: Dict[int, float],
_debug_check_distribution_property: Optional[str] = None
truck_arrival_distribution_slice: typing.Dict[int, float],
_debug_check_distribution_property: typing.Optional[str] = None
) -> int:
"""
Returns:
Expand Down Expand Up @@ -152,7 +156,7 @@ def _get_time_window_of_truck_arrival(
else:
raise UnknownDistributionPropertyException(_debug_check_distribution_property)
else:
selected_time_window = random.choices(
selected_time_window = self.seeded_random.choices(
population=time_windows_for_truck_arrival,
weights=total_probabilities
)[0]
Expand All @@ -178,7 +182,7 @@ def _get_time_window_of_truck_arrival(
return selected_time_window

@staticmethod
def _drop_where_zero(sequence: Sequence, filter_sequence: Sequence) -> list:
def _drop_where_zero(sequence: typing.Sequence, filter_sequence: typing.Sequence) -> list:
new_sequence = []
for element, filter_element in zip(sequence, filter_sequence):
if filter_element:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from __future__ import annotations
import logging
import random
from typing import Dict, Type, List

from conflowgen.application.repositories.random_seed_store_repository import get_initialised_random_object
from conflowgen.domain_models.container import Container
from conflowgen.domain_models.distribution_repositories.mode_of_transport_distribution_repository import \
ModeOfTransportDistributionRepository
Expand All @@ -18,6 +18,8 @@ class AllocateSpaceForContainersDeliveredByTruckService:
ignored_capacity = ContainerLength.get_teu_factor(ContainerLength.other)

def __init__(self):
self.seeded_random = get_initialised_random_object(self.__class__.__name__)

self.logger = logging.getLogger("conflowgen")
self.mode_of_transport_distribution_repository = ModeOfTransportDistributionRepository()
self.mode_of_transport_distribution: Dict[ModeOfTransport, Dict[ModeOfTransport, float]] | None = None
Expand Down Expand Up @@ -155,7 +157,7 @@ def _pick_vehicle_type(
return None

# pick vehicle type
vehicle_type: ModeOfTransport = random.choices(
vehicle_type: ModeOfTransport = self.seeded_random.choices(
population=vehicle_types,
weights=frequency_of_vehicle_types
)[0]
Expand All @@ -178,7 +180,7 @@ def _pick_vehicle(
"by trucks.")
return None

vehicle: Type[AbstractLargeScheduledVehicle] = random.choices(
vehicle: Type[AbstractLargeScheduledVehicle] = self.seeded_random.choices(
population=list(vehicle_distribution.keys()),
weights=list(vehicle_distribution.values())
)[0]
Expand Down
Loading

0 comments on commit acb58ba

Please sign in to comment.