feat: support local llms (#68)

cheesewafer · Yu Xia · chenweize1998 · web-flow · commit 39faec379221 · 2023-10-19T10:31:02.000+08:00
* support local LLMs

---------

Co-authored-by: Yu Xia &lt;xiayu@YudeMacBook-Pro.lan&gt;
Co-authored-by: Weize Chen &lt;32613237+chenweize1998@users.noreply.github.com&gt;
Co-authored-by: chenweize1998 &lt;chenweize1998@gmail.com&gt;
diff --git a/agentverse/llms/openai.py b/agentverse/llms/openai.py
@@ -92,10 +92,12 @@ class OpenAIChatArgs(BaseModelArgs):
 #             total_tokens=response["usage"]["total_tokens"],
 #         )
 
-
+# To support your own local LLMs, register it here and add it into LOCAL_LLMS.
+LOCAL_LLMS = ['llama-2-7b-chat-hf']
 @llm_registry.register("gpt-35-turbo")
 @llm_registry.register("gpt-3.5-turbo")
 @llm_registry.register("gpt-4")
+@llm_registry.register("llama-2-7b-chat-hf")
 class OpenAIChat(BaseChatModel):
     args: OpenAIChatArgs = Field(default_factory=OpenAIChatArgs)
 
@@ -109,6 +111,8 @@ def __init__(self, max_retry: int = 3, **kwargs):
             args[k] = kwargs.pop(k, v)
         if len(kwargs) > 0:
             logging.warning(f"Unused arguments: {kwargs}")
+        if args['model'] in LOCAL_LLMS:
+            openai.api_base = "http://localhost:5000/v1"
         super().__init__(args=args, max_retry=max_retry)
 
     # def _construct_messages(self, history: List[Message]):
@@ -301,6 +305,7 @@ def get_spend(self) -> int:
             "gpt-4": 0.03,
             "gpt-4-0613": 0.03,
             "gpt-4-32k": 0.06,
+            "llama-2-7b-chat-hf": 0.0,
         }
 
         output_cost_map = {
@@ -311,6 +316,7 @@ def get_spend(self) -> int:
             "gpt-4": 0.06,
             "gpt-4-0613": 0.06,
             "gpt-4-32k": 0.12,
+            "llama-2-7b-chat-hf": 0.0,
         }
 
         model = self.args.model
diff --git a/agentverse/tasks/tasksolving/commongen/llama-2-7b-chat-hf/config.yaml b/agentverse/tasks/tasksolving/commongen/llama-2-7b-chat-hf/config.yaml
@@ -0,0 +1,197 @@
+cnt_agents: &cnt_agents 2
+max_turn: &max_turn 3
+max_inner_turns: &max_inner_turns 3
+
+prompts:
+  role_assigner_prepend_prompt: &role_assigner_prepend_prompt |-
+
+  role_assigner_append_prompt: &role_assigner_append_prompt |-
+    # Role Description
+    You are the leader of a group of experts, now you need to recruit a small group of experts with diverse identity to generate coherent and grammatically correct sentences containing the following given words:
+    ${task_description}
+    
+    You can recruit ${cnt_critic_agents} expert in different fields. What experts will you recruit?
+    
+    # Response Format Guidance
+    You should respond with a list of expert description. For example:
+    1. an electrical engineer specified in the filed of xxx.
+    2. an economist who is good at xxx.
+    3. a lawyer with a good knowledge of xxx.
+    ...
+    
+    Only respond with the description of each role. Do not include your reason.
+
+  solver_prepend_prompt: &solver_prepend_prompt |-
+    You are ${role_description}. Generate a coherent and grammatically correct paragraph containing the following given words (or their variations):
+    WORDS: 
+    ${task_description}
+
+  solver_append_prompt: &solver_append_prompt |-
+
+  critic_prepend_prompt: &critic_prepend_prompt |-
+    You are in a discussion group, aiming to generate coherent and grammatically correct sentences containing the following given words (or their variations):
+    WORDS:
+    ${task_description}
+
+    Below is the chat history in your group.
+    
+  critic_append_prompt: &critic_append_prompt |-
+    You are ${role_description}. Based on your knowledge, can you check whether the latest provided paragraph contains all the given words or their variations? When responding, you should follow the following rules:
+    1. If the above latest provided solution has covered all the given words or their variations, end your response with a special token "[Agree]".
+    1. If not, double-check the above solutions, give your critics, and generate a better solution.
+
+  manager_prompt: &manager_prompt |-
+
+  executor_prepend_prompt: &executor_prepend_prompt |-
+
+  executor_append_prompt: &executor_append_prompt |-
+
+  evaluator_prepend_prompt: &evaluator_prepend_prompt |-
+
+  evaluator_append_prompt: &evaluator_append_prompt |-
+    You are a reviewer who checks whether a paragraph contains all the given words (including their variations). When some words are missing, you should patiently point out, and output a score of 0. When the paragraph contains all the words, you should output a score of 1.
+  
+    WORDS: 
+    ${task_description}
+
+    SOLUTION: 
+    ```
+    ${solution}
+    ```
+
+    TEST RESULT:
+    ${result}
+    
+    RESPONSE FORMAT:
+    You must respond in the following format:
+    Score: (0 or 1. 0 if there are some missing words, 1 if there is no missing words)
+    Advice: (point out all the missing words)
+    
+
+name: pipeline
+
+
+environment:
+  env_type: task-basic
+  max_turn: *max_turn
+  rule:
+    role_assigner:
+      type: role_description
+      cnt_agents: *cnt_agents
+    decision_maker:
+      type: vertical-solver-first
+      max_inner_turns: *max_inner_turns
+    executor:
+      type: coverage-test
+    evaluator:
+      type: basic
+
+agents:
+  - #role_assigner_agent:
+    agent_type: role_assigner
+    name: role assigner
+    max_retry: 1000
+    prepend_prompt_template: *role_assigner_prepend_prompt
+    append_prompt_template: *role_assigner_append_prompt
+    memory:
+      memory_type: chat_history
+    llm:
+      llm_type: llama-2-7b-chat-hf
+      model: "llama-2-7b-chat-hf"
+      temperature: 0
+      max_tokens: 512
+    output_parser:
+      type: role_assigner
+
+  - #solver_agent:
+    agent_type: solver
+    name: Planner
+    max_retry: 1000
+    max_history: 4
+    prepend_prompt_template: *solver_prepend_prompt
+    append_prompt_template: *solver_append_prompt
+    memory:
+      memory_type: chat_history
+    llm:
+      llm_type: llama-2-7b-chat-hf
+      model: "llama-2-7b-chat-hf"
+      temperature: 0
+      max_tokens: 1024
+    output_parser:
+      type: commongen
+      # max_tokens: 1024
+      # stop:
+      #   - "\ndef "
+      #   - "\nclass "
+      #   - "\nif "
+      #   - "\n\n#"
+
+  - #critic_agents:
+    agent_type: critic
+    name: Critic 1
+    max_retry: 1000
+    max_history: 4
+    role_description: |-
+      Waiting to be assigned.
+    prepend_prompt_template: *critic_prepend_prompt
+    append_prompt_template: *critic_append_prompt
+    memory:
+      memory_type: chat_history
+    llm:
+      llm_type: llama-2-7b-chat-hf
+      model: "llama-2-7b-chat-hf"
+      temperature: 0
+      max_tokens: 1024
+    output_parser:
+      type: mgsm-critic-agree
+
+  - #executor_agent:
+    agent_type: executor
+    name: Executor
+    max_retry: 1000
+    prepend_prompt_template: *executor_prepend_prompt
+    append_prompt_template: *executor_append_prompt
+    memory:
+      memory_type: chat_history
+    llm:
+      llm_type: llama-2-7b-chat-hf
+      model: llama-2-7b-chat-hf
+      temperature: 0
+      max_tokens: 1024
+    output_parser:
+      type: commongen
+
+  - #evaluator_agent:
+    agent_type: evaluator
+    name: Evaluator
+    max_retry: 1000
+    role_description: |-
+      Evaluator
+    prepend_prompt_template: *evaluator_prepend_prompt
+    append_prompt_template: *evaluator_append_prompt
+    memory:
+      memory_type: chat_history
+    llm:
+      llm_type: llama-2-7b-chat-hf
+      model: llama-2-7b-chat-hf
+      temperature: 0.3
+      max_tokens: 1024
+    output_parser:
+      type: humaneval-evaluator
+      dimensions:
+        - Score
+
+  - #manager_agent:
+    agent_type: manager
+    name: Manager
+    max_retry: 1000
+    prompt_template: *manager_prompt
+    memory:
+      memory_type: chat_history
+    llm:
+      llm_type: llama-2-7b-chat-hf
+      model: "llama-2-7b-chat-hf"
+      temperature: 0
+      max_tokens: 1024
+    output_parser:
+      type: humaneval-manager
diff --git a/dataloader/commongen.py b/dataloader/commongen.py
@@ -5,6 +5,7 @@
 
 @dataloader_registry.register("tasksolving/commongen/gpt-4")
 @dataloader_registry.register("tasksolving/commongen/gpt-3.5")
+@dataloader_registry.register("tasksolving/commongen/llama-2-7b-chat-hf")
 class CommongenLoader(DataLoader):
     def __init__(self, path: str):
         super().__init__(path)
diff --git a/requirements.txt b/requirements.txt
@@ -17,3 +17,4 @@ colorlog
 rapidfuzz
 spacy
 colorama==0.4.6
+fschat[model_worker,webui]
diff --git a/scripts/run_local_model_server.sh b/scripts/run_local_model_server.sh
@@ -0,0 +1,11 @@
+:<<COMMENT
+See https://github.com/lm-sys/FastChat/blob/main/docs/openai_api.md for more usages.
+COMMENT
+# export CUDA_VISIBLE_DEVICES=0
+MODEL_PATH="path_to_the_downloaded_model_dir"
+MODEL_NAME="name_of_the_model"
+python3 -m fastchat.serve.controller & \
+python3 -m fastchat.serve.multi_model_worker \
+    --model-path ${MODEL_PATH} \
+    --model-names ${MODEL_NAME} & \
+python3 -m fastchat.serve.openai_api_server --host localhost --port 5000