Skip to content

Commit

Permalink
padding vocab_size when using pipeline parallellism
Browse files Browse the repository at this point in the history
  • Loading branch information
flybird11111 committed Mar 7, 2024
1 parent dca9225 commit f646050
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 0 deletions.
2 changes: 2 additions & 0 deletions colossalai/booster/plugin/hybrid_parallel_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -961,6 +961,7 @@ def __init__(
pp_style: str = "1f1b",
num_model_chunks: int = 1,
enable_metadata_cache: bool = True,
make_vocab_size_divisible_by: int = 128,
) -> None:
super().__init__()
assert (
Expand Down Expand Up @@ -1033,6 +1034,7 @@ def __init__(
enable_jit_fused=self.enable_jit_fused,
enable_sequence_parallelism=enable_sequence_parallelism,
enable_sequence_overlap=enable_sequence_overlap,
make_vocab_size_divisible_by=make_vocab_size_divisible_by,
)
self.amp_config = dict(
initial_scale=initial_scale,
Expand Down
1 change: 1 addition & 0 deletions colossalai/shardformer/shard/shard_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class ShardConfig:
enable_sequence_parallelism: bool = False
enable_sequence_overlap: bool = False
extra_kwargs: Dict[str, Any] = field(default_factory=dict)
make_vocab_size_divisible_by: int = 128
# pipeline_parallel_size: int
# data_parallel_size: int
# tensor_parallel_mode: Literal['1d', '2d', '2.5d', '3d']
Expand Down

0 comments on commit f646050

Please sign in to comment.