Skip to content

Commit

Permalink
Add Internlm2 support (#576)
Browse files Browse the repository at this point in the history
Co-authored-by: Casper <casperbh.96@gmail.com>
  • Loading branch information
Crystalcareai and casper-hansen committed Aug 12, 2024
1 parent 4d5c4a9 commit 6f14fc7
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 0 deletions.
1 change: 1 addition & 0 deletions awq/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@
from .cohere import CohereAWQForCausalLM
from .deepseek_v2 import DeepseekV2AWQForCausalLM
from .minicpm import MiniCPMAWQForCausalLM
from .internlm2 import InternLM2AWQForCausalLM
1 change: 1 addition & 0 deletions awq/models/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"cohere": CohereAWQForCausalLM,
"deepseek_v2": DeepseekV2AWQForCausalLM,
"minicpm": MiniCPMAWQForCausalLM,
"internlm2": InternLM2AWQForCausalLM,
}


Expand Down
1 change: 1 addition & 0 deletions awq/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
"cohere": "AutoModelForCausalLM",
"deepseek_v2": "AutoModelForCausalLM",
"minicpm": "AutoModelForCausalLM",
"internlm2": "AutoModelForCausalLM",
}


Expand Down
76 changes: 76 additions & 0 deletions awq/models/internlm2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import tqdm
from typing import List, Tuple
from .base import BaseAWQForCausalLM


class InternLM2AWQForCausalLM(BaseAWQForCausalLM):
layer_type = "InternLM2DecoderLayer"
max_seq_len_key = "max_position_embeddings"

@staticmethod
def get_model_layers(model):
return model.model.layers

@staticmethod
def get_act_for_scaling(module):
return dict(
is_scalable=True,
scale_name="feed_forward.w2",
scale_layer=module.feed_forward.w2,
scale_shape=module.feed_forward.w2.out_features,
)

@staticmethod
def move_embed(model, device: str):
model.model.tok_embeddings = model.model.tok_embeddings.to(device)

@staticmethod
def get_layers_for_scaling(module, input_feat, module_kwargs):
layers = []

# attention input
layers.append(
dict(
prev_op=module.attention_norm,
layers=[
module.attention.wqkv,
],
inp=input_feat["attention.wqkv"],
module2inspect=module.attention,
kwargs=module_kwargs,
)
)

# attention out
layers.append(
dict(
prev_op=module.attention.wqkv,
layers=[module.attention.wo],
inp=input_feat["attention.wo"],
)
)

# feed forward input
layers.append(
dict(
prev_op=module.ffn_norm,
layers=[
module.feed_forward.w1,
module.feed_forward.w3,
],
inp=input_feat["feed_forward.w1"],
module2inspect=module.feed_forward,
kwargs=module_kwargs,
)
)

# feed forward output
layers.append(
dict(
prev_op=module.feed_forward.w1,
layers=[module.feed_forward.w2],
inp=input_feat["feed_forward.w2"],
)
)

return layers

0 comments on commit 6f14fc7

Please sign in to comment.