Skip to content

Commit

Permalink
padding vocab_size when using pipeline parallellism
Browse files Browse the repository at this point in the history
  • Loading branch information
flybird11111 committed Mar 7, 2024
1 parent 2c2c3cd commit dca9225
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions colossalai/shardformer/policies/gpt2.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,13 @@ def preprocess(self):
if vocab_size % world_size != 0:
new_vocab_size = vocab_size + world_size - vocab_size % world_size
self.model.resize_token_embeddings(new_vocab_size)
elif self.shard_config.pipeline_stage_manager is not None:
# padding vocab_size when using pipeline parallellism
new_vocab_size = vocab_size
multiple = self.shard_config.make_vocab_size_divisible_by
while (new_vocab_size % multiple) != 0:
new_vocab_size += 1
self.model.resize_token_embeddings(new_vocab_size)
return self.model

def module_policy(self):
Expand Down

0 comments on commit dca9225

Please sign in to comment.