Skip to content

Commit

Permalink
Orca change default backend of PyTorch estimator to bigdl (#2966)
Browse files Browse the repository at this point in the history
* change type

* Change default backend of PyTorch estimator to bigdl

* Update PythonInferenceModel.scala

* fix

* update docs
  • Loading branch information
cyita committed Oct 16, 2020
1 parent 2f2f030 commit 520be8d
Show file tree
Hide file tree
Showing 7 changed files with 31 additions and 15 deletions.
24 changes: 21 additions & 3 deletions docs/docs/Orca/orca-pytorch-estimator.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ Estimator.from_torch(*,
use_tqdm=False,
workers_per_node=1,
model_dir=None,
backend="horovod"):
backend="bigdl"):
```
* `model`: PyTorch model if `backend="bigdl"`, PyTorch model creator if `backend="horovod"`
* `optimizer`: bigdl optimizer if `backend="bigdl"`, PyTorch optimizer creator if `backend="horovod"`
Expand All @@ -49,7 +49,7 @@ Estimator.from_torch(*,
* `use_tqdm`: parameter for horovod. You can monitor training progress if use_tqdm=True.
* `workers_per_node`: parameter for horovod. worker number on each node. default: 1.
* `model_dir`: parameter for `bigdl`. The path to save model. During the training, if checkpoint_trigger is defined and triggered, the model will be saved to model_dir.
* `backend`: You can choose "horovod" or "bigdl" as backend.
* `backend`: You can choose "horovod" or "bigdl" as backend. Default: bigdl.

### Use horovod Estimator
#### **Train model**
Expand Down Expand Up @@ -113,7 +113,25 @@ evaluate(self, data, validation_methods=None, batch_size=32)
#### **Get model**
You can get model using `get_model(self)`


#### **Load model**
You can load saved model using `load(self, checkpoint, loss=None)`
* `checkpoint`: (str) Path to target checkpoint file.
* `loss`: PyTorch loss function.

#### **Clear gradient clipping**
You can clear gradient clipping parameters using `clear_gradient_clipping(self)`. In this case, gradient clipping will not be applied.
**Note:** In order to take effect, it needs to be called before fit.

#### **Set constant gradient clipping**
You can Set constant gradient clipping during the training process using `set_constant_gradient_clipping(self, min, max)`.
* `min`: The minimum value to clip by.
* `max`: The maximum value to clip by.
**Note:** In order to take effect, it needs to be called before fit.

#### **Set clip gradient to a maximum L2-Norm**
You can set clip gradient to a maximum L2-Norm during the training process using `set_l2_norm_gradient_clipping(self, clip_norm)`.
* `clip_norm`: Gradient L2-Norm threshold.
**Note:** In order to take effect, it needs to be called before fit.



Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,11 @@ def transform(df):

with tempfile.TemporaryDirectory() as temp_dir_name:
estimator = Estimator.from_torch(model=model, loss=loss_func,
optimizer=SGD(), model_dir=temp_dir_name,
backend="bigdl")
optimizer=SGD(), model_dir=temp_dir_name)
estimator.fit(data=data_shard, epochs=4, batch_size=2, validation_data=data_shard,
validation_methods=[Accuracy()], checkpoint_trigger=EveryEpoch())
estimator.evaluate(data_shard, validation_methods=[Accuracy()], batch_size=2)
est2 = Estimator.from_torch(model=model, loss=loss_func, optimizer=None,
backend="bigdl")
est2 = Estimator.from_torch(model=model, loss=loss_func, optimizer=None)
est2.load(temp_dir_name, loss=loss_func)
est2.fit(data=data_shard, epochs=8, batch_size=2, validation_data=data_shard,
validation_methods=[Accuracy()], checkpoint_trigger=EveryEpoch())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def forward(self, x):
model = SimpleModel()

estimator = Estimator.from_torch(model=model, loss=nn.BCELoss(),
optimizer=Adam(), backend="bigdl")
optimizer=Adam())

def get_dataloader():
inputs = torch.Tensor([[1, 2], [1, 3], [3, 2], [5, 6], [8, 9], [1, 9]])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def test_train(self):
"lr": 1e-2, # used in optimizer_creator
"hidden_size": 1, # used in model_creator
"batch_size": 4, # used in data_creator
})
}, backend="horovod")
stats1 = estimator.fit(train_data_creator, epochs=5)
train_loss1 = stats1[-1]["train_loss"]
validation_loss1 = estimator.evaluate(validation_data_creator)["val_loss"]
Expand All @@ -62,7 +62,7 @@ def test_save_and_restore(self):
"lr": 1e-2, # used in optimizer_creator
"hidden_size": 1, # used in model_creator
"batch_size": 4, # used in data_creator
})
}, backend="horovod")
with TemporaryDirectory() as tmp_path:
estimator1.fit(train_data_creator, epochs=1)
checkpoint_path = os.path.join(tmp_path, "checkpoint")
Expand All @@ -81,7 +81,7 @@ def test_save_and_restore(self):
"lr": 1e-2, # used in optimizer_creator
"hidden_size": 1, # used in model_creator
"batch_size": 4, # used in data_creator
})
}, backend="horovod")
estimator2.load(checkpoint_path)

model2 = estimator2.get_model()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def train_yseq_hvd(workers_per_node, epochs, **config):
optimizer=optimizer_creator,
loss=loss_creator,
workers_per_node=workers_per_node,
config=config)
config=config, backend="horovod")

stats = estimator.fit(train_data_creator, epochs=epochs)
for s in stats:
Expand Down
2 changes: 1 addition & 1 deletion pyzoo/zoo/examples/orca/learn/horovod/pytorch_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def train_example(workers_per_node):
"lr": 1e-2, # used in optimizer_creator
"hidden_size": 1, # used in model_creator
"batch_size": 4, # used in data_creator
})
}, backend="horovod")

# train 5 epochs
stats = estimator.fit(train_data_creator, epochs=5)
Expand Down
4 changes: 2 additions & 2 deletions pyzoo/zoo/orca/learn/pytorch/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ def from_torch(*,
use_tqdm=False,
workers_per_node=1,
model_dir=None,
backend="horovod"):
backend="bigdl"):
if backend == "horovod":
return PyTorchHorovodEstimatorWrapper(model_creator=model,
optimizer_creator=optimizer,
Expand Down Expand Up @@ -253,7 +253,7 @@ def get_model(self):
def save(self, checkpoint):
pass

def load(self, checkpoint, loss=None, model_dir=None):
def load(self, checkpoint, loss=None):
from zoo.orca.learn.utils import find_latest_checkpoint
from bigdl.nn.layer import Model
from bigdl.optim.optimizer import OptimMethod
Expand Down

0 comments on commit 520be8d

Please sign in to comment.