diff --git a/applications/ColossalEval/colossal_eval/evaluate/gpt_evaluate.py b/applications/ColossalEval/colossal_eval/evaluate/gpt_evaluate.py
index a0b1ed1143f0..19907daaff7f 100644
--- a/applications/ColossalEval/colossal_eval/evaluate/gpt_evaluate.py
+++ b/applications/ColossalEval/colossal_eval/evaluate/gpt_evaluate.py
@@ -670,7 +670,7 @@ def calculate_scores_form_logprobs(logprobs: Dict[str, Any]) -> float:
 def calculate_scores_form_response(response: str, evaluation: Dict[str, Any]) -> int:
     """
     Calculate the score from the response returned by gpt-3.5-turbo or gpt-4.
-    Different from text-davinci-003, this fuction directly calculates the score according to the plain response returned by gpt-3.5-turbo or gpt-4.
+    Different from text-davinci-003, this function directly calculates the score according to the plain response returned by gpt-3.5-turbo or gpt-4.
     Although text-davinci-003 can return log probabilities, it costs ten times as much as gpt-3.5-turbo.
 
     Args:
diff --git a/applications/ColossalMoE/train.py b/applications/ColossalMoE/train.py
index 99603282baf3..850236726a27 100644
--- a/applications/ColossalMoE/train.py
+++ b/applications/ColossalMoE/train.py
@@ -128,13 +128,13 @@ def parse_args():
     parser.add_argument(
         "--comm_overlap",
         action="store_true",
-        help="Use communication overlap for MoE. Recommended to enable for muiti-node training.",
+        help="Use communication overlap for MoE. Recommended to enable for multi-node training.",
     )
     # hierarchical all-to-all
     parser.add_argument(
         "--hierarchical_alltoall",
         action="store_true",
-        help="Use hierarchical all-to-all for MoE. Recommended to enable for muiti-node training.",
+        help="Use hierarchical all-to-all for MoE. Recommended to enable for multi-node training.",
     )
 
     args = parser.parse_args()
@@ -267,7 +267,7 @@ def main():
                 # ):
                 #     coordinator.print_on_master(f"Apply load balance")
                 #     apply_load_balance(model, optimizer)
-                # save ckeckpoint
+                # save checkpoint
                 if (step + 1) % args.save_interval == 0:
                     coordinator.print_on_master(f"Saving model checkpoint to {args.output_path}")
                     save_checkpoint(
diff --git a/applications/ColossalQA/colossalqa/data_loader/document_loader.py b/applications/ColossalQA/colossalqa/data_loader/document_loader.py
index 4ddbf2b9d249..ca0030621a84 100644
--- a/applications/ColossalQA/colossalqa/data_loader/document_loader.py
+++ b/applications/ColossalQA/colossalqa/data_loader/document_loader.py
@@ -52,7 +52,7 @@ def __init__(self, files: List, **kwargs) -> None:
     def load_data(self, path: str) -> None:
         """
         Load data. Please refer to https://python.langchain.com/docs/modules/data_connection/document_loaders/
-            for sepcific format requirements.
+            for specific format requirements.
         Args:
             path: path to a file
                 To load files with glob path, here are some examples.
diff --git a/applications/ColossalQA/colossalqa/local/colossalcloud_llm.py b/applications/ColossalQA/colossalqa/local/colossalcloud_llm.py
index 62aead66c54b..483f65e20ea0 100644
--- a/applications/ColossalQA/colossalqa/local/colossalcloud_llm.py
+++ b/applications/ColossalQA/colossalqa/local/colossalcloud_llm.py
@@ -100,7 +100,7 @@ def _call(self, prompt: str, stop=None, **kwargs: Any) -> str:
     
 
     def text_completion(self, prompt, gen_config, auth_config):
-        # Complusory Parameters
+        # Required Parameters
         endpoint = auth_config.pop('endpoint')
         max_new_tokens = gen_config.pop('max_new_tokens')
         # Optional Parameters
diff --git a/applications/ColossalQA/colossalqa/local/llm.py b/applications/ColossalQA/colossalqa/local/llm.py
index 0aa383e9d0b9..4cc82c1a3903 100644
--- a/applications/ColossalQA/colossalqa/local/llm.py
+++ b/applications/ColossalQA/colossalqa/local/llm.py
@@ -33,7 +33,7 @@ class ColossalAPI:
 
     def __init__(self, model_type: str, model_path: str, ckpt_path: str = None) -> None:
         """
-        Configurate model
+        Configure model
         """
         if model_type + model_path + (ckpt_path or "") in ColossalAPI.__instances:
             return
@@ -47,7 +47,7 @@ def __init__(self, model_type: str, model_path: str, ckpt_path: str = None) -> N
             self.model.load_state_dict(state_dict)
         self.model.to(torch.cuda.current_device())
 
-        # Configurate tokenizer
+        # Configure tokenizer
         self.tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
 
         self.model.eval()
@@ -87,7 +87,7 @@ def generate(self, input: str, **kwargs) -> str:
 
 class VllmAPI:
     def __init__(self, host: str = "localhost", port: int = 8077) -> None:
-        # Configurate api for model served through web
+        # Configure api for model served through web
         self.host = host
         self.port = port
         self.url = f"http://{self.host}:{self.port}/generate"
diff --git a/applications/ColossalQA/colossalqa/retrieval_conversation_universal.py b/applications/ColossalQA/colossalqa/retrieval_conversation_universal.py
index b23058d6dbe3..6e77bb2aee17 100644
--- a/applications/ColossalQA/colossalqa/retrieval_conversation_universal.py
+++ b/applications/ColossalQA/colossalqa/retrieval_conversation_universal.py
@@ -36,7 +36,7 @@ def __init__(
         text_splitter_chunk_overlap=10,
     ) -> None:
         """
-        Warpper for multilingual retrieval qa class (Chinese + English)
+        Wrapper for multilingual retrieval qa class (Chinese + English)
         Args:
             embedding_model_path: local or huggingface embedding model
             embedding_model_device:
diff --git a/applications/ColossalQA/colossalqa/retriever.py b/applications/ColossalQA/colossalqa/retriever.py
index c891cb613bd6..870a73ca5c25 100644
--- a/applications/ColossalQA/colossalqa/retriever.py
+++ b/applications/ColossalQA/colossalqa/retriever.py
@@ -59,7 +59,7 @@ def add_documents(
         Add documents to retriever
         Args:
             docs: the documents to add
-            cleanup: choose from "incremental" (update embeddings, skip existing embeddings) and "full" (destory and rebuild retriever)
+            cleanup: choose from "incremental" (update embeddings, skip existing embeddings) and "full" (destroy and rebuild retriever)
             mode: choose from "by source" (documents are grouped by source) and "merge" (documents are merged into one vector store)
         """
         if cleanup == "full":
diff --git a/applications/ColossalQA/colossalqa/utils.py b/applications/ColossalQA/colossalqa/utils.py
index cd8c3e5acec8..49d99014b372 100644
--- a/applications/ColossalQA/colossalqa/utils.py
+++ b/applications/ColossalQA/colossalqa/utils.py
@@ -49,7 +49,7 @@ def destroy_sql_database(sql_engine: Union[Engine, str]) -> None:
 
 def detect_lang_naive(s):
     """
-    Naive function for language detection, should be replaced by an independant layer
+    Naive function for language detection, should be replaced by an independent layer
     """
     remove_nota = "[’·°–!\"#$%&'()*+,-./:;<=>?@，。?★、…【】（）《》？“”‘’！[\\]^_`{|}~]+"
     s = re.sub(remove_nota, "", s)
diff --git a/examples/language/openmoe/model/modeling_openmoe.py b/examples/language/openmoe/model/modeling_openmoe.py
index eee3b505a22a..fdd8442f506b 100644
--- a/examples/language/openmoe/model/modeling_openmoe.py
+++ b/examples/language/openmoe/model/modeling_openmoe.py
@@ -96,7 +96,7 @@ def set_openmoe_args(
         load_balance_beam_width (int, optional): Expert load balance search's beam width. Defaults to 8.
         load_balance_group_swap_factor (float, optional): Expert load balance group swap factor. Longer value encourages less swap. Defaults to 0.4.
         enable_kernel (bool, optional): Use kernel optimization. Defaults to False.
-        enable_comm_overlap (bool, optional): Use communication overlap for MoE. Recommended to enable for muiti-node training. Defaults to False.
+        enable_comm_overlap (bool, optional): Use communication overlap for MoE. Recommended to enable for multi-node training. Defaults to False.
         enable_hierarchical_alltoall (bool, optional): Use hierarchical alltoall for MoE. Defaults to False.
     """
     moe_args = dict(
diff --git a/examples/language/openmoe/train.py b/examples/language/openmoe/train.py
index f3267b7c6a68..92f4e066a7a5 100644
--- a/examples/language/openmoe/train.py
+++ b/examples/language/openmoe/train.py
@@ -190,13 +190,13 @@ def parse_args():
     parser.add_argument(
         "--comm_overlap",
         action="store_true",
-        help="Use communication overlap for MoE. Recommended to enable for muiti-node training.",
+        help="Use communication overlap for MoE. Recommended to enable for multi-node training.",
     )
     # hierarchical all-to-all
     parser.add_argument(
         "--hierarchical_alltoall",
         action="store_true",
-        help="Use hierarchical all-to-all for MoE. Recommended to enable for muiti-node training.",
+        help="Use hierarchical all-to-all for MoE. Recommended to enable for multi-node training.",
     )
 
     args = parser.parse_args()
@@ -366,7 +366,7 @@ def main():
                 ):
                     coordinator.print_on_master(f"Apply load balance")
                     apply_load_balance(model, optimizer)
-                # save ckeckpoint
+                # save checkpoint
                 if (step + 1) % args.save_interval == 0:
                     coordinator.print_on_master(f"Saving model checkpoint to {args.output_path}")
                     booster.save_model(model, args.output_path, shard=True)