From e4944191271edc8eebd068af9b93ccedb50a074c Mon Sep 17 00:00:00 2001 From: guanjihuan Date: Wed, 19 Nov 2025 20:08:38 +0800 Subject: [PATCH] update --- 2025.11.19_modelscope_qwen/download_qwen3.py | 3 + .../download_qwen3_GGUF.py | 3 + 2025.11.19_modelscope_qwen/run_qwen3.py | 79 ++++++++++++++++++ 2025.11.19_modelscope_qwen/run_qwen3_GGUF.py | 80 +++++++++++++++++++ 4 files changed, 165 insertions(+) create mode 100644 2025.11.19_modelscope_qwen/download_qwen3.py create mode 100644 2025.11.19_modelscope_qwen/download_qwen3_GGUF.py create mode 100644 2025.11.19_modelscope_qwen/run_qwen3.py create mode 100644 2025.11.19_modelscope_qwen/run_qwen3_GGUF.py diff --git a/2025.11.19_modelscope_qwen/download_qwen3.py b/2025.11.19_modelscope_qwen/download_qwen3.py new file mode 100644 index 0000000..1d40bbe --- /dev/null +++ b/2025.11.19_modelscope_qwen/download_qwen3.py @@ -0,0 +1,3 @@ +# 模型下载 +from modelscope import snapshot_download +model_dir = snapshot_download('Qwen/Qwen3-0.6B', local_dir='D:/models/Qwen/Qwen3-0.6B') \ No newline at end of file diff --git a/2025.11.19_modelscope_qwen/download_qwen3_GGUF.py b/2025.11.19_modelscope_qwen/download_qwen3_GGUF.py new file mode 100644 index 0000000..582bac4 --- /dev/null +++ b/2025.11.19_modelscope_qwen/download_qwen3_GGUF.py @@ -0,0 +1,3 @@ +# 模型下载 +from modelscope import snapshot_download +model_dir = snapshot_download('Qwen/Qwen3-0.6B-GGUF', local_dir='D:/models/Qwen/Qwen3-0.6B-GGUF') \ No newline at end of file diff --git a/2025.11.19_modelscope_qwen/run_qwen3.py b/2025.11.19_modelscope_qwen/run_qwen3.py new file mode 100644 index 0000000..a4b0c6d --- /dev/null +++ b/2025.11.19_modelscope_qwen/run_qwen3.py @@ -0,0 +1,79 @@ +""" +This code is supported by the website: https://www.guanjihuan.com +The newest version of this code is on the web page: https://www.guanjihuan.com/archives/48066 +""" + +from modelscope import AutoModelForCausalLM, AutoTokenizer +import time + +class QwenChatbot: + def __init__(self, model_name="D:/models/Qwen/Qwen3-0.6B"): + """ + 初始化 Qwen 聊天机器人 + :param model_name: 模型路径 + """ + self.tokenizer = AutoTokenizer.from_pretrained(model_name) # 加载分词器 + self.model = AutoModelForCausalLM.from_pretrained(model_name) # 加载模型 + self.history = [] # 存储对话历史 + + def generate_response(self, user_input): + """ + 生成模型回复 + :param user_input: 用户输入 + :return: 模型回复 + """ + # 将历史对话和当前用户输入组合成消息列表 + messages = self.history + [{"role": "user", "content": user_input}] + + # 使用聊天模板格式化输入文本 + text = self.tokenizer.apply_chat_template( + messages, + tokenize=False, # 不进行分词,返回字符串 + add_generation_prompt=True # 添加生成提示 + ) + + # 对文本进行分词,并返回PyTorch张量 + inputs = self.tokenizer(text, return_tensors="pt") + + # 生成回复,最多生成32768个新token + response_ids = self.model.generate(**inputs, max_new_tokens=32768)[0][len(inputs.input_ids[0]):].tolist() + + # 解码生成的token,跳过特殊token + response = self.tokenizer.decode(response_ids, skip_special_tokens=True) + + # 更新对话历史 + self.history.append({"role": "user", "content": user_input}) + self.history.append({"role": "assistant", "content": response}) + + return response + +# 示例使用 +if __name__ == "__main__": + chatbot = QwenChatbot() # 创建聊天机器人实例 + + # 第一次输入(不带/think或/no_think标签,默认启用思考模式) + start_time = time.time() + user_input_1 = "计算:1+1" + print(f"用户: {user_input_1}") + response_1 = chatbot.generate_response(user_input_1) + print(f"机器人: {response_1}") + end_time = time.time() + print(f"\n--- 分割线(耗时:{end_time-start_time:.2f} 秒) ---\n") + + # 第二次输入带/no_think标签 + start_time = time.time() + user_input_2 = "确定吗?/no_think" + print(f"用户: {user_input_2}") + response_2 = chatbot.generate_response(user_input_2) + print(f"机器人: {response_2}") + end_time = time.time() + print(f"\n--- 分割线(耗时:{end_time-start_time:.2f} 秒) ---\n") + + # 第三次输入带/think标签 + start_time = time.time() + user_input_3 = "确定吗?/think" + print(f"用户: {user_input_3}") + response_3 = chatbot.generate_response(user_input_3) + print(f"机器人: {response_3}") + end_time = time.time() + print(f"\n--- 分割线(耗时:{end_time-start_time:.2f} 秒) ---\n") \ No newline at end of file diff --git a/2025.11.19_modelscope_qwen/run_qwen3_GGUF.py b/2025.11.19_modelscope_qwen/run_qwen3_GGUF.py new file mode 100644 index 0000000..eaa1570 --- /dev/null +++ b/2025.11.19_modelscope_qwen/run_qwen3_GGUF.py @@ -0,0 +1,80 @@ +""" +This code is supported by the website: https://www.guanjihuan.com +The newest version of this code is on the web page: https://www.guanjihuan.com/archives/48066 +""" + +from llama_cpp import Llama +import time + +class QwenChatbotGGUF: + def __init__(self, model_path="D:/models/Qwen/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf", n_ctx=32768): + """ + 初始化基于 GGUF 的 Qwen3 聊天机器人 + :param model_path: GGUF 模型文件路径(必须是 Qwen3 的 GGUF 文件) + :param n_ctx: 上下文长度(Qwen3 支持长上下文,最大可设 32768) + """ + self.llm = Llama( + model_path=model_path, + n_ctx=n_ctx, + verbose=False, + chat_format="chatml", # Qwen 使用 ChatML 格式 + logits_all=False + ) + self.history = [] + + def generate_response(self, user_input): + """ + 生成模型回复 + :param user_input: 用户输入文本 + :return: 模型回复文本 + """ + # 将当前输入加入历史(role=user) + self.history.append({"role": "user", "content": user_input}) + + # 使用 llama.cpp 内置的 chat completion(自动处理模板) + response = self.llm.create_chat_completion( + messages=self.history, + max_tokens=2048, + temperature=0.6, + top_p=0.95, + repeat_penalty=1.5, + ) + + # 提取助手回复内容 + assistant_message = response["choices"][0]["message"]["content"].strip() + + # 将助手回复加入历史 + self.history.append({"role": "assistant", "content": assistant_message}) + + return assistant_message + +# 示例使用 +if __name__ == "__main__": + chatbot = QwenChatbotGGUF() # 创建聊天机器人实例 + + # 第一次输入 + start_time = time.time() + user_input_1 = "计算:1+1" + print(f"用户: {user_input_1}") + response_1 = chatbot.generate_response(user_input_1) + print(f"机器人: {response_1}") + end_time = time.time() + print(f"\n--- 分割线(耗时:{end_time - start_time:.2f} 秒) ---\n") + + # 第二次输入(带 /no_think) + start_time = time.time() + user_input_2 = "确定吗?/no_think" + print(f"用户: {user_input_2}") + response_2 = chatbot.generate_response(user_input_2) + print(f"机器人: {response_2}") + end_time = time.time() + print(f"\n--- 分割线(耗时:{end_time - start_time:.2f} 秒) ---\n") + + # 第三次输入(带 /think) + start_time = time.time() + user_input_3 = "确定吗?/think" + print(f"用户: {user_input_3}") + response_3 = chatbot.generate_response(user_input_3) + print(f"机器人: {response_3}") + end_time = time.time() + print(f"\n--- 分割线(耗时:{end_time - start_time:.2f} 秒) ---\n") \ No newline at end of file