ray-project · simonsays1980 · Sep 17, 2024 · Sep 17, 2024 · sven1977 · Sep 18, 2024
@@ -455,7 +455,7 @@ def __init__(
                 object. If unspecified, a default logger is created.
             **kwargs: Arguments passed to the Trainable base class.
         """
-        config = config or self.get_default_config()
+        config = config  # or self.get_default_config()
 
         # Translate possible dict into an AlgorithmConfig object, as well as,
         # resolving generic config objects into specific ones (e.g. passing
@@ -466,22 +466,31 @@ def __init__(
             # `self.get_default_config()` also returned a dict ->
             # Last resort: Create core AlgorithmConfig from merged dicts.
             if isinstance(default_config, dict):
-                config = AlgorithmConfig.from_dict(
-                    config_dict=self.merge_algorithm_configs(
-                        default_config, config, True
+                if "class" in config:
+                    AlgorithmConfig.from_state(config)
+                else:
+                    config = AlgorithmConfig.from_dict(
+                        config_dict=self.merge_algorithm_configs(
+                            default_config, config, True
+                        )
                     )
-                )
+
             # Default config is an AlgorithmConfig -> update its properties
             # from the given config dict.
             else:
-                config = default_config.update_from_dict(config)
+                if isinstance(config, dict) and "class" in config:
+                    config = default_config.from_state(config)
+                else:
+                    config = default_config.update_from_dict(config)
         else:
             default_config = self.get_default_config()
             # Given AlgorithmConfig is not of the same type as the default config:
             # This could be the case e.g. if the user is building an algo from a
             # generic AlgorithmConfig() object.
             if not isinstance(config, type(default_config)):
                 config = default_config.update_from_dict(config.to_dict())
+            else:
+                config = default_config.from_state(config.get_state())
-            default_config = self.get_default_config()
-            # Given AlgorithmConfig is not of the same type as the default config:
-            # This could be the case e.g. if the user is building an algo from a
-            # generic AlgorithmConfig() object.
-            if not isinstance(config, type(default_config)):
-                config = default_config.update_from_dict(config.to_dict())
-            else:
-                config = default_config.from_state(config.get_state())
+            default_config = self.get_default_config()
+            config_state = config.get_state()
+            config = default_config
+            config.set_state(config_state)
-            default_config = self.get_default_config()
-            # Given AlgorithmConfig is not of the same type as the default config:
-            # This could be the case e.g. if the user is building an algo from a
-            # generic AlgorithmConfig() object.
-            if not isinstance(config, type(default_config)):
-                config = default_config.update_from_dict(config.to_dict())
-            else:
-                config = default_config.from_state(config.get_state())
+            default_config = self.get_default_config()
+            config_state = config.get_state()
+            config = default_config
+            config.set_state(config_state)
 
         # In case this algo is using a generic config (with no algo_class set), set it
         # here.
@@ -2899,7 +2908,7 @@ def get_checkpointable_components(self) -> List[Tuple[str, "Checkpointable"]]:
     @override(Checkpointable)
     def get_ctor_args_and_kwargs(self) -> Tuple[Tuple, Dict[str, Any]]:
         return (
-            (self.config,),  # *args,
+            (self.config.get_state(),),  # *args,
             {},  # **kwargs
         )
 

@@ -633,12 +633,7 @@ def to_dict(self) -> AlgorithmConfigDict:
             policies_dict = {}
             for policy_id, policy_spec in config.pop("policies").items():
                 if isinstance(policy_spec, PolicySpec):
-                    policies_dict[policy_id] = (
-                        policy_spec.policy_class,
-                        policy_spec.observation_space,
-                        policy_spec.action_space,
-                        policy_spec.config,
-                    )
+                    policies_dict[policy_id] = policy_spec.get_state()
                 else:
                     policies_dict[policy_id] = policy_spec
             config["policies"] = policies_dict
@@ -781,6 +776,53 @@ def update_from_dict(
 
         return self
 
+    def get_state(self) -> Dict[str, Any]:
+        """Returns a dict state that can be pickled.
+
+        Returns:
+            A dictionary containing all attributes of the instance.
+        """
+
+        state = self.__dict__.copy()
+        state["class"] = type(self)
+        state.pop("algo_class")
+        state.pop("_is_frozen")
+
+        # Convert `policies` (PolicySpecs?) into dict.
+        # Convert policies dict such that each policy ID maps to a old-style.
+        # 4-tuple: class, obs-, and action space, config.
+        # TODO (simon, sven): Remove when deprecating old stack.
+        if "policies" in state and isinstance(state["policies"], dict):
+            policies_dict = {}
+            for policy_id, policy_spec in state.pop("policies").items():
+                if isinstance(policy_spec, PolicySpec):
+                    policies_dict[policy_id] = policy_spec.get_state()
+                else:
+                    policies_dict[policy_id] = policy_spec
+            state["policies"] = policies_dict
+
+        return state
+
+    @classmethod
+    def from_state(cls, state: Dict[str, Any]) -> "AlgorithmConfig":
+        """Returns an instance constructed from the state.
+
+        Args:
+            cls: An `AlgorithmConfig` class.
+            state: A dictionary containing the state of an `AlgorithmConfig`.
+                See `AlgorithmConfig.get_state` for creating a state.
+
+        Returns:
+            An `AlgorithmConfig` instance with attributes from the `state`.
+        """
+
+        ctor = state["class"]
+        config = ctor()
+
+        config.__dict__.update(state)
+
+        return config
+
     # TODO(sven): We might want to have a `deserialize` method as well. Right now,
     #  simply using the from_dict() API works in this same (deserializing) manner,
     #  whether the dict used is actually code-free (already serialized) or not

@@ -124,6 +124,23 @@ def __eq__(self, other: "PolicySpec"):
             and self.config == other.config
         )
 
+    def get_state(self) -> Dict[str, Any]:
+        """Returns the state of a `PolicyDict` as a dict."""
+        return (
+            self.policy_class,
+            self.observation_space,
+            self.action_space,
+            self.config,
+        )
+
+    @classmethod
+    def from_state(cls, state: Dict[str, Any]) -> "PolicySpec":
+        """Builds a `PolicySpec` from a state."""
+        policy_spec = PolicySpec()
+        policy_spec.__dict__.update(state)
+
+        return policy_spec
+
     def serialize(self) -> Dict:
         from ray.rllib.algorithms.registry import get_policy_class_name
 

@@ -424,7 +424,6 @@ def from_checkpoint(
                 "an implementer of the `Checkpointable` API!"
             )
 
-        # Construct an initial object.
         obj = ctor(
             *ctor_info["ctor_args_and_kwargs"][0],
             **ctor_info["ctor_args_and_kwargs"][1],