Skip to content
This repository has been archived by the owner on Jul 1, 2024. It is now read-only.

Commit

Permalink
Revamp logging (#478)
Browse files Browse the repository at this point in the history
Summary:
This is a bunch of changes to make our training logs more meaningful and
easier to understand. We print the task config in the beginning of training,
make it clear what values are approximate or final, supress verbose logs
by default and format floats accordingly.

Before this diff: P128995674
After this diff: P128995244
Pull Request resolved: #478

Differential Revision: D21022171

Pulled By: vreis

fbshipit-source-id: e1770477071095e54e3edd7387501d0728480060
  • Loading branch information
vreis authored and facebook-github-bot committed Apr 14, 2020
1 parent d16fc33 commit aed1730
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 45 deletions.
37 changes: 11 additions & 26 deletions classy_vision/hooks/loss_lr_meter_logging_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ class LossLrMeterLoggingHook(ClassyHook):
Logs the loss, optimizer LR, and meters. Logs at the end of a phase.
"""

on_start = ClassyHook._noop
on_phase_start = ClassyHook._noop
on_end = ClassyHook._noop

Expand All @@ -35,6 +34,9 @@ def __init__(self, log_freq: Optional[int] = None) -> None:
), "log_freq must be an int or None"
self.log_freq: Optional[int] = log_freq

def on_start(self, task) -> None:
logging.info(f"Starting training. Task: {task}")

def on_phase_end(self, task) -> None:
"""
Log the loss, optimizer LR, and meters for the phase.
Expand All @@ -45,10 +47,7 @@ def on_phase_end(self, task) -> None:
# do not explicitly state this since it is possible for a
# trainer to implement an unsynced end of phase meter or
# for meters to not provide a sync function.
logging.info("End of phase metric values:")
self._log_loss_meters(task)
if task.train:
self._log_lr(task)
self._log_loss_meters(task, prefix="Synced meters: ")

def on_step(self, task) -> None:
"""
Expand All @@ -58,18 +57,9 @@ def on_step(self, task) -> None:
return
batches = len(task.losses)
if batches and batches % self.log_freq == 0:
self._log_lr(task)
logging.info("Local unsynced metric values:")
self._log_loss_meters(task)

def _log_lr(self, task) -> None:
"""
Compute and log the optimizer LR.
"""
optimizer_lr = task.optimizer.parameters.lr
logging.info("Learning Rate: {}\n".format(optimizer_lr))
self._log_loss_meters(task, prefix="Approximate meter values: ")

def _log_loss_meters(self, task) -> None:
def _log_loss_meters(self, task, prefix="") -> None:
"""
Compute and log the loss and meters.
"""
Expand All @@ -80,14 +70,9 @@ def _log_loss_meters(self, task) -> None:

# Loss for the phase
loss = sum(task.losses) / (batches * task.get_batchsize_per_replica())
phase_pct = batches / task.num_batches_per_phase

log_strs = [
"Rank: {}, {} phase: {}, processed batches: {}".format(
get_rank(), phase_type, phase_type_idx, batches
),
"{} loss: {}".format(phase_type, loss),
"Meters:",
]
for meter in task.meters:
log_strs.append("{}".format(meter))
logging.info("\n".join(log_strs))
logging.info(
f"{prefix}{phase_type} phase {phase_type_idx} ({phase_pct*100:.2f}% done), "
f"loss: {loss:.4f}, meters: {task.meters}"
)
5 changes: 2 additions & 3 deletions classy_vision/hooks/model_complexity_hook.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,9 @@ def on_start(self, task) -> None:
)
except NotImplementedError:
logging.warning(
"""Model contains unsupported modules:
Could not compute FLOPs for model forward pass. Exception:""",
exc_info=True,
"Model contains unsupported modules, could not compute FLOPs for model forward pass."
)
logging.debug("Exception:", exc_info=True)
try:
self.num_activations = compute_activations(
task.base_model,
Expand Down
3 changes: 0 additions & 3 deletions classy_vision/meters/accuracy_meter.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,6 @@ def set_classy_state(self, state):
self._curr_correct_predictions_k = state["curr_correct_predictions_k"].clone()
self._curr_sample_count = state["curr_sample_count"].clone()

def __repr__(self):
return repr({"name": self.name, "value": self.value})

def update(self, model_output, target, **kwargs):
"""
args:
Expand Down
4 changes: 4 additions & 0 deletions classy_vision/meters/classy_meter.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,7 @@ def set_classy_state(self, state: Dict[str, Any]) -> None:
This is used to load the state of the meter from a checkpoint.
"""
raise NotImplementedError

def __repr__(self):
values = ",".join([f"{key}={value:.6f}" for key, value in self.value.items()])
return f"{self.name}_meter({values})"
3 changes: 0 additions & 3 deletions classy_vision/meters/precision_meter.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,6 @@ def set_classy_state(self, state):
self._curr_correct_predictions_k = state["curr_correct_predictions_k"].clone()
self._curr_sample_count = state["curr_sample_count"].clone()

def __repr__(self):
return repr({"name": self.name, "value": self.value})

def update(self, model_output, target, **kwargs):
"""
args:
Expand Down
3 changes: 0 additions & 3 deletions classy_vision/meters/recall_meter.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,6 @@ def set_classy_state(self, state):
self._curr_correct_predictions_k = state["curr_correct_predictions_k"].clone()
self._curr_correct_targets = state["curr_correct_targets"].clone()

def __repr__(self):
return repr({"name": self.name, "value": self.value})

def update(self, model_output, target, **kwargs):
"""
args:
Expand Down
3 changes: 0 additions & 3 deletions classy_vision/meters/video_meter.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,6 @@ def set_classy_state(self, state):
self.reset()
self.meter.set_classy_state(state["meter_state"])

def __repr__(self):
return repr({"name": self.name, "value": self.value})

def update(self, model_output, target, is_train, **kwargs):
"""Updates any internal state of meter with new model output and target.
Expand Down
18 changes: 14 additions & 4 deletions classy_vision/tasks/classification_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import copy
import enum
import json
import logging
import math
import time
Expand Down Expand Up @@ -413,6 +414,10 @@ def from_config(cls, config: Dict[str, Any]) -> "ClassificationTask":
for phase_type in phase_types:
task.set_dataset(datasets[phase_type], phase_type)

# NOTE: this is a private member and only meant to be used for
# logging/debugging purposes. See __repr__ implementation
task._config = config

return task

@property
Expand Down Expand Up @@ -854,7 +859,7 @@ def advance_phase(self):
resets counters, shuffles dataset, rebuilds iterators, and
sets the train / test state for phase.
"""
logging.info("Advancing phase")
logging.debug("Advancing phase")
# Reset meters for next phase / epoch
for meter in self.meters:
meter.reset()
Expand Down Expand Up @@ -893,7 +898,7 @@ def _recreate_data_loader_from_dataset(self, phase_type=None):
if phase_type is None:
phase_type = self.phase_type

logging.info("Recreating data loader for new phase")
logging.debug("Recreating data loader for new phase")
num_workers = 0
if hasattr(self.dataloaders[phase_type], "num_workers"):
num_workers = self.dataloaders[phase_type].num_workers
Expand Down Expand Up @@ -979,10 +984,10 @@ def on_phase_start(self):
def on_phase_end(self):
self.log_phase_end("train")

logging.info("Syncing meters on phase end...")
logging.debug("Syncing meters on phase end...")
for meter in self.meters:
meter.sync_state()
logging.info("...meters synced")
logging.debug("...meters synced")
barrier()

for hook in self.hooks:
Expand Down Expand Up @@ -1016,3 +1021,8 @@ def log_phase_end(self, tag):
"im_per_sec": im_per_sec,
}
)

def __repr__(self):
if hasattr(self, "_config"):
return json.dumps(self._config, indent=4)
return super().__repr__()

0 comments on commit aed1730

Please sign in to comment.