Skip to content

Commit

Permalink
Add log files for each mongo worker
Browse files Browse the repository at this point in the history
  • Loading branch information
Cmurilochem committed Mar 29, 2024
1 parent 8d84a3d commit 9be8b73
Showing 1 changed file with 12 additions and 4 deletions.
16 changes: 12 additions & 4 deletions n3fit/src/n3fit/hyper_optimization/mongofiletrials.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ def __init__(
f"mongo://{self.db_host}:{self.db_port}/{self._process_db_name(self.db_name)}/jobs"
)
self.workers = []
self.log_files = []

self._store_trial = False
self._json_file = replica_path / "tries.json"
Expand Down Expand Up @@ -195,7 +196,7 @@ def refresh(self):

# write json to disk
if self._store_trial:
log.info("Storing scan in %s", self._json_file)
# log.info("Storing scan in %s", self._json_file)
local_trials = []
for idx, t in enumerate(self._dynamic_trials):
local_trials.append(t)
Expand Down Expand Up @@ -264,21 +265,28 @@ def start_mongo_workers(
# avoid memory fragmentation issues?
# my_env["TF_GPU_ALLOCATOR"] = "cuda_malloc_async"

# create log files to redirect the mongo-workers output
mongo_workers_logfile = f"mongo-worker_{i+1}.log"
log_file = open(mongo_workers_logfile, 'w')
self.log_files.append(log_file)
# run mongo workers
# we could use stdout=subprocess.DEVNULL and stderr=subprocess.DEVNULL in Popen to suppress output info
worker = subprocess.Popen(args, env=my_env)
worker = subprocess.Popen(
args, env=my_env, stdout=log_file, stderr=subprocess.STDOUT
)
self.workers.append(worker)
log.info(f"Started mongo worker {i+1}/{self.num_workers}")
except OSError as err:
log_file.close()
msg = f"Failed to execute {args}. Make sure you have MongoDB installed."
raise EnvironmentError(msg) from err

def stop_mongo_workers(self):
"""Terminates all active mongo workers."""
for worker in self.workers:
for worker, log_file in zip(self.workers, self.log_files):
try:
worker.terminate()
worker.wait()
log_file.close()
log.info(f"Stopped mongo worker {self.workers.index(worker)+1}/{self.num_workers}")
except Exception as err:
log.error(
Expand Down

0 comments on commit 9be8b73

Please sign in to comment.