oobabooga · Yiximail · Aug 22, 2024 · Aug 22, 2024 · Aug 22, 2024 · Aug 23, 2024
diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py
@@ -16,6 +16,7 @@
 from extensions.openai.utils import debug_msg
 from modules import shared
 from modules.chat import (
+    get_stopping_strings,
     generate_chat_prompt,
     generate_chat_reply,
     load_character_memoized,
@@ -242,6 +243,9 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
     # generation parameters
     generate_params = process_parameters(body, is_legacy=is_legacy)
     continue_ = body['continue_']
+    impersonate = body['impersonate']
+    if impersonate:
+        continue_ = False # While impersonate, continue_ should be False. References impersonate_wrapper in chat.py
 
     # Instruction template
     if body['instruction_template_str']:
@@ -294,6 +298,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
 
     def chat_streaming_chunk(content):
         # begin streaming
+        role = 'user' if impersonate else 'assistant'
         chunk = {
             "id": cmpl_id,
             "object": object_type,
@@ -302,7 +307,7 @@ def chat_streaming_chunk(content):
             resp_list: [{
                 "index": 0,
                 "finish_reason": None,
-                "delta": {'role': 'assistant', 'content': content},
+                "delta": {'role': role, 'content': content},
             }],
         }
 
@@ -314,7 +319,9 @@ def chat_streaming_chunk(content):
         return chunk
 
     # generate reply #######################################
-    prompt = generate_chat_prompt(user_input, generate_params, _continue=continue_)
+    prompt = generate_chat_prompt(user_input, generate_params, _continue=continue_, impersonate=impersonate)
+    if impersonate:
+        prompt += user_input
     if prompt_only:
         yield {'prompt': prompt}
         return
@@ -324,14 +331,23 @@ def chat_streaming_chunk(content):
     if stream:
         yield chat_streaming_chunk('')
 
-    generator = generate_chat_reply(
-        user_input, generate_params, regenerate=False, _continue=continue_, loading_message=False)
+    if impersonate:
+        stopping_strings = get_stopping_strings(generate_params)
+        generator = generate_reply(prompt, generate_params, stopping_strings=stopping_strings, is_chat=True)
+    else:
+        generator = generate_chat_reply(
+            user_input, generate_params, regenerate=False, _continue=continue_, loading_message=False)
 
     answer = ''
     seen_content = ''
 
     for a in generator:
-        answer = a['internal'][-1][1]
+        if impersonate:
+            # The generate_chat_reply returns the entire message, but generate_reply will only start from new content.
+            # So we need to add the user_input to keep output consistent.
+            answer = user_input + a
+        else:
+            answer = a['internal'][-1][1]
         if stream:
             len_seen = len(seen_content)
             new_content = answer[len_seen:]
@@ -360,6 +376,7 @@ def chat_streaming_chunk(content):
 
         yield chunk
     else:
+        role = 'user' if impersonate else 'assistant'
         resp = {
             "id": cmpl_id,
             "object": object_type,
@@ -368,7 +385,7 @@ def chat_streaming_chunk(content):
             resp_list: [{
                 "index": 0,
                 "finish_reason": stop_reason,
-                "message": {"role": "assistant", "content": answer}
+                "message": {"role": role, "content": answer}
             }],
             "usage": {
                 "prompt_tokens": token_count,

diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py
@@ -114,6 +114,8 @@ class ChatCompletionRequestParams(BaseModel):
 
     continue_: bool = Field(default=False, description="Makes the last bot message in the history be continued instead of starting a new message.")
 
+    impersonate: bool = Field(default=False, description="Impersonate the user in the chat. Makes the model continue generate the last user message.")
+
 
 class ChatCompletionRequest(GenerationOptions, ChatCompletionRequestParams):
     pass