Merge pull request #6064 from wangbluo/fix_attn

[sp] : fix the attention kernel for sp
hpcaitech · Sep 18, 2024 · 63314ce · 63314ce
2 parents 37e3523 + 10e4f7d
commit 63314ce
Showing 1 changed file with 2 additions and 1 deletion.
diff --git a/colossalai/shardformer/layer/attn.py b/colossalai/shardformer/layer/attn.py
@@ -121,7 +121,8 @@ def _dispatch_kernel(dtype: torch.dtype, mask_type: Optional[AttnMaskType], size
             )
 
         if size >= MEMORY_BOUND:
-            ColoAttention._flash_kernel_dispatch = ColoAttention._flash_kernel_dispatch.load()
+            if isinstance(ColoAttention._flash_kernel_dispatch, KernelLoader):
+                ColoAttention._flash_kernel_dispatch = ColoAttention._flash_kernel_dispatch.load()
         # lazy load
         if isinstance(ColoAttention._kernel_dispatch_map[dtype][mask_type], KernelLoader):
             ColoAttention._kernel_dispatch_map[dtype][mask_type] = ColoAttention._kernel_dispatch_map[dtype][