Orca change default backend of PyTorch estimator to bigdl (#2966)

* change type * Change default backend of PyTorch estimator to bigdl * Update PythonInferenceModel.scala * fix * update docs
intel-analytics · Oct 16, 2020 · 520be8d · 520be8d
1 parent 2f2f030
commit 520be8d
Show file tree

Hide file tree

Showing 7 changed files with 31 additions and 15 deletions.
diff --git a/docs/docs/Orca/orca-pytorch-estimator.md b/docs/docs/Orca/orca-pytorch-estimator.md
@@ -36,7 +36,7 @@ Estimator.from_torch(*,
                    use_tqdm=False,
                    workers_per_node=1,
                    model_dir=None,
-                   backend="horovod"):
+                   backend="bigdl"):
 ```
 * `model`: PyTorch model if `backend="bigdl"`, PyTorch model creator if `backend="horovod"`
 * `optimizer`: bigdl optimizer if `backend="bigdl"`, PyTorch optimizer creator if `backend="horovod"`
@@ -49,7 +49,7 @@ Estimator.from_torch(*,
 * `use_tqdm`: parameter for horovod. You can monitor training progress if use_tqdm=True.
 * `workers_per_node`: parameter for horovod. worker number on each node. default: 1.
 * `model_dir`: parameter for `bigdl`. The path to save model. During the training, if checkpoint_trigger is defined and triggered, the model will be saved to model_dir.
-* `backend`: You can choose "horovod" or "bigdl" as backend.
+* `backend`: You can choose "horovod" or "bigdl" as backend. Default: bigdl.
 
 ### Use horovod Estimator
 #### **Train model**
@@ -113,7 +113,25 @@ evaluate(self, data, validation_methods=None, batch_size=32)
 #### **Get model**
 You can get model using `get_model(self)`
 
-
+#### **Load model**
+You can load saved model using `load(self, checkpoint, loss=None)`
+* `checkpoint`: (str) Path to target checkpoint file.
+* `loss`: PyTorch loss function.
+
+#### **Clear gradient clipping**
+You can clear gradient clipping parameters using `clear_gradient_clipping(self)`. In this case, gradient clipping will not be applied.
+**Note:** In order to take effect, it needs to be called before fit.
+
+#### **Set constant gradient clipping**
+You can Set constant gradient clipping during the training process using `set_constant_gradient_clipping(self, min, max)`.
+* `min`: The minimum value to clip by.
+* `max`: The maximum value to clip by.
+**Note:** In order to take effect, it needs to be called before fit.
+
+#### **Set clip gradient to a maximum L2-Norm**
+You can set clip gradient to a maximum L2-Norm during the training process using `set_l2_norm_gradient_clipping(self, clip_norm)`.
+* `clip_norm`: Gradient L2-Norm threshold.
+**Note:** In order to take effect, it needs to be called before fit.
 
 
 
diff --git a/pyzoo/test/zoo/orca/learn/jep/test_pytorch_estimator_for_spark.py b/pyzoo/test/zoo/orca/learn/jep/test_pytorch_estimator_for_spark.py
@@ -75,13 +75,11 @@ def transform(df):
 
         with tempfile.TemporaryDirectory() as temp_dir_name:
             estimator = Estimator.from_torch(model=model, loss=loss_func,
-                                             optimizer=SGD(), model_dir=temp_dir_name,
-                                             backend="bigdl")
+                                             optimizer=SGD(), model_dir=temp_dir_name)
             estimator.fit(data=data_shard, epochs=4, batch_size=2, validation_data=data_shard,
                           validation_methods=[Accuracy()], checkpoint_trigger=EveryEpoch())
             estimator.evaluate(data_shard, validation_methods=[Accuracy()], batch_size=2)
-            est2 = Estimator.from_torch(model=model, loss=loss_func, optimizer=None,
-                                        backend="bigdl")
+            est2 = Estimator.from_torch(model=model, loss=loss_func, optimizer=None)
             est2.load(temp_dir_name, loss=loss_func)
             est2.fit(data=data_shard, epochs=8, batch_size=2, validation_data=data_shard,
                      validation_methods=[Accuracy()], checkpoint_trigger=EveryEpoch())

diff --git a/pyzoo/test/zoo/orca/learn/jep/test_pytorch_estimator_for_spark_creator.py b/pyzoo/test/zoo/orca/learn/jep/test_pytorch_estimator_for_spark_creator.py
@@ -61,7 +61,7 @@ def forward(self, x):
         model = SimpleModel()
 
         estimator = Estimator.from_torch(model=model, loss=nn.BCELoss(),
-                                         optimizer=Adam(), backend="bigdl")
+                                         optimizer=Adam())
 
         def get_dataloader():
             inputs = torch.Tensor([[1, 2], [1, 3], [3, 2], [5, 6], [8, 9], [1, 9]])

diff --git a/pyzoo/test/zoo/orca/learn/ray/pytorch/test_pytorch_estimator.py b/pyzoo/test/zoo/orca/learn/ray/pytorch/test_pytorch_estimator.py
@@ -38,7 +38,7 @@ def test_train(self):
                 "lr": 1e-2,  # used in optimizer_creator
                 "hidden_size": 1,  # used in model_creator
                 "batch_size": 4,  # used in data_creator
-            })
+            }, backend="horovod")
         stats1 = estimator.fit(train_data_creator, epochs=5)
         train_loss1 = stats1[-1]["train_loss"]
         validation_loss1 = estimator.evaluate(validation_data_creator)["val_loss"]
@@ -62,7 +62,7 @@ def test_save_and_restore(self):
                 "lr": 1e-2,  # used in optimizer_creator
                 "hidden_size": 1,  # used in model_creator
                 "batch_size": 4,  # used in data_creator
-            })
+            }, backend="horovod")
         with TemporaryDirectory() as tmp_path:
             estimator1.fit(train_data_creator, epochs=1)
             checkpoint_path = os.path.join(tmp_path, "checkpoint")
@@ -81,7 +81,7 @@ def test_save_and_restore(self):
                     "lr": 1e-2,  # used in optimizer_creator
                     "hidden_size": 1,  # used in model_creator
                     "batch_size": 4,  # used in data_creator
-                })
+                }, backend="horovod")
             estimator2.load(checkpoint_path)
 
             model2 = estimator2.get_model()

diff --git a/pyzoo/zoo/automl/model/tcmf/local_model_distributed_trainer.py b/pyzoo/zoo/automl/model/tcmf/local_model_distributed_trainer.py
@@ -171,7 +171,7 @@ def train_yseq_hvd(workers_per_node, epochs, **config):
         optimizer=optimizer_creator,
         loss=loss_creator,
         workers_per_node=workers_per_node,
-        config=config)
+        config=config, backend="horovod")
 
     stats = estimator.fit(train_data_creator, epochs=epochs)
     for s in stats:

diff --git a/pyzoo/zoo/examples/orca/learn/horovod/pytorch_estimator.py b/pyzoo/zoo/examples/orca/learn/horovod/pytorch_estimator.py
@@ -87,7 +87,7 @@ def train_example(workers_per_node):
             "lr": 1e-2,  # used in optimizer_creator
             "hidden_size": 1,  # used in model_creator
             "batch_size": 4,  # used in data_creator
-        })
+        }, backend="horovod")
 
     # train 5 epochs
     stats = estimator.fit(train_data_creator, epochs=5)

diff --git a/pyzoo/zoo/orca/learn/pytorch/estimator.py b/pyzoo/zoo/orca/learn/pytorch/estimator.py
@@ -59,7 +59,7 @@ def from_torch(*,
                    use_tqdm=False,
                    workers_per_node=1,
                    model_dir=None,
-                   backend="horovod"):
+                   backend="bigdl"):
         if backend == "horovod":
             return PyTorchHorovodEstimatorWrapper(model_creator=model,
                                                   optimizer_creator=optimizer,
@@ -253,7 +253,7 @@ def get_model(self):
     def save(self, checkpoint):
         pass
 
-    def load(self, checkpoint, loss=None, model_dir=None):
+    def load(self, checkpoint, loss=None):
         from zoo.orca.learn.utils import find_latest_checkpoint
         from bigdl.nn.layer import Model
         from bigdl.optim.optimizer import OptimMethod