From e4944191271edc8eebd068af9b93ccedb50a074c Mon Sep 17 00:00:00 2001
From: guanjihuan <guanjihuan@163.com>
Date: Wed, 19 Nov 2025 20:08:38 +0800
Subject: [PATCH] update

---
 2025.11.19_modelscope_qwen/download_qwen3.py  |  3 +
 .../download_qwen3_GGUF.py                    |  3 +
 2025.11.19_modelscope_qwen/run_qwen3.py       | 79 ++++++++++++++++++
 2025.11.19_modelscope_qwen/run_qwen3_GGUF.py  | 80 +++++++++++++++++++
 4 files changed, 165 insertions(+)
 create mode 100644 2025.11.19_modelscope_qwen/download_qwen3.py
 create mode 100644 2025.11.19_modelscope_qwen/download_qwen3_GGUF.py
 create mode 100644 2025.11.19_modelscope_qwen/run_qwen3.py
 create mode 100644 2025.11.19_modelscope_qwen/run_qwen3_GGUF.py

diff --git a/2025.11.19_modelscope_qwen/download_qwen3.py b/2025.11.19_modelscope_qwen/download_qwen3.py
new file mode 100644
index 0000000..1d40bbe
--- /dev/null
+++ b/2025.11.19_modelscope_qwen/download_qwen3.py
@@ -0,0 +1,3 @@
+# 模型下载
+from modelscope import snapshot_download
+model_dir = snapshot_download('Qwen/Qwen3-0.6B', local_dir='D:/models/Qwen/Qwen3-0.6B')
\ No newline at end of file
diff --git a/2025.11.19_modelscope_qwen/download_qwen3_GGUF.py b/2025.11.19_modelscope_qwen/download_qwen3_GGUF.py
new file mode 100644
index 0000000..582bac4
--- /dev/null
+++ b/2025.11.19_modelscope_qwen/download_qwen3_GGUF.py
@@ -0,0 +1,3 @@
+# 模型下载
+from modelscope import snapshot_download
+model_dir = snapshot_download('Qwen/Qwen3-0.6B-GGUF', local_dir='D:/models/Qwen/Qwen3-0.6B-GGUF')
\ No newline at end of file
diff --git a/2025.11.19_modelscope_qwen/run_qwen3.py b/2025.11.19_modelscope_qwen/run_qwen3.py
new file mode 100644
index 0000000..a4b0c6d
--- /dev/null
+++ b/2025.11.19_modelscope_qwen/run_qwen3.py
@@ -0,0 +1,79 @@
+"""
+This code is supported by the website: https://www.guanjihuan.com
+The newest version of this code is on the web page: https://www.guanjihuan.com/archives/48066
+"""
+
+from modelscope import AutoModelForCausalLM, AutoTokenizer
+import time
+
+class QwenChatbot:
+    def __init__(self, model_name="D:/models/Qwen/Qwen3-0.6B"):
+        """
+        初始化 Qwen 聊天机器人
+        :param model_name: 模型路径
+        """
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)  # 加载分词器
+        self.model = AutoModelForCausalLM.from_pretrained(model_name)  # 加载模型
+        self.history = []  # 存储对话历史
+
+    def generate_response(self, user_input):
+        """
+        生成模型回复
+        :param user_input: 用户输入
+        :return: 模型回复
+        """
+        # 将历史对话和当前用户输入组合成消息列表
+        messages = self.history + [{"role": "user", "content": user_input}]
+
+        # 使用聊天模板格式化输入文本
+        text = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,  # 不进行分词，返回字符串
+            add_generation_prompt=True  # 添加生成提示
+        )
+
+        # 对文本进行分词，并返回PyTorch张量
+        inputs = self.tokenizer(text, return_tensors="pt")
+        
+        # 生成回复，最多生成32768个新token
+        response_ids = self.model.generate(**inputs, max_new_tokens=32768)[0][len(inputs.input_ids[0]):].tolist()
+        
+        # 解码生成的token，跳过特殊token
+        response = self.tokenizer.decode(response_ids, skip_special_tokens=True)
+
+        # 更新对话历史
+        self.history.append({"role": "user", "content": user_input})
+        self.history.append({"role": "assistant", "content": response})
+
+        return response
+
+# 示例使用
+if __name__ == "__main__":
+    chatbot = QwenChatbot()  # 创建聊天机器人实例
+
+    # 第一次输入（不带/think或/no_think标签，默认启用思考模式）
+    start_time = time.time()
+    user_input_1 = "计算：1+1"
+    print(f"用户: {user_input_1}")
+    response_1 = chatbot.generate_response(user_input_1)
+    print(f"机器人: {response_1}")
+    end_time = time.time()
+    print(f"\n--- 分割线（耗时：{end_time-start_time:.2f} 秒） ---\n")
+
+    # 第二次输入带/no_think标签
+    start_time = time.time()
+    user_input_2 = "确定吗？/no_think"
+    print(f"用户: {user_input_2}")
+    response_2 = chatbot.generate_response(user_input_2)
+    print(f"机器人: {response_2}") 
+    end_time = time.time()
+    print(f"\n--- 分割线（耗时：{end_time-start_time:.2f} 秒） ---\n")
+
+    # 第三次输入带/think标签
+    start_time = time.time()
+    user_input_3 = "确定吗？/think"
+    print(f"用户: {user_input_3}")
+    response_3 = chatbot.generate_response(user_input_3)
+    print(f"机器人: {response_3}")
+    end_time = time.time()
+    print(f"\n--- 分割线（耗时：{end_time-start_time:.2f} 秒） ---\n")
\ No newline at end of file
diff --git a/2025.11.19_modelscope_qwen/run_qwen3_GGUF.py b/2025.11.19_modelscope_qwen/run_qwen3_GGUF.py
new file mode 100644
index 0000000..eaa1570
--- /dev/null
+++ b/2025.11.19_modelscope_qwen/run_qwen3_GGUF.py
@@ -0,0 +1,80 @@
+"""
+This code is supported by the website: https://www.guanjihuan.com
+The newest version of this code is on the web page: https://www.guanjihuan.com/archives/48066
+"""
+
+from llama_cpp import Llama
+import time
+
+class QwenChatbotGGUF:
+    def __init__(self, model_path="D:/models/Qwen/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf", n_ctx=32768):
+        """
+        初始化基于 GGUF 的 Qwen3 聊天机器人
+        :param model_path: GGUF 模型文件路径（必须是 Qwen3 的 GGUF 文件）
+        :param n_ctx: 上下文长度（Qwen3 支持长上下文，最大可设 32768）
+        """
+        self.llm = Llama(
+            model_path=model_path,
+            n_ctx=n_ctx,
+            verbose=False,
+            chat_format="chatml",  # Qwen 使用 ChatML 格式
+            logits_all=False
+        )
+        self.history = []
+
+    def generate_response(self, user_input):
+        """
+        生成模型回复
+        :param user_input: 用户输入文本
+        :return: 模型回复文本
+        """
+        # 将当前输入加入历史（role=user）
+        self.history.append({"role": "user", "content": user_input})
+
+        # 使用 llama.cpp 内置的 chat completion（自动处理模板）
+        response = self.llm.create_chat_completion(
+            messages=self.history,
+            max_tokens=2048,
+            temperature=0.6,
+            top_p=0.95,
+            repeat_penalty=1.5,
+        )
+
+        # 提取助手回复内容
+        assistant_message = response["choices"][0]["message"]["content"].strip()
+
+        # 将助手回复加入历史
+        self.history.append({"role": "assistant", "content": assistant_message})
+
+        return assistant_message
+
+# 示例使用
+if __name__ == "__main__":
+    chatbot = QwenChatbotGGUF()  # 创建聊天机器人实例
+
+    # 第一次输入
+    start_time = time.time()
+    user_input_1 = "计算：1+1"
+    print(f"用户: {user_input_1}")
+    response_1 = chatbot.generate_response(user_input_1)
+    print(f"机器人: {response_1}")
+    end_time = time.time()
+    print(f"\n--- 分割线（耗时：{end_time - start_time:.2f} 秒） ---\n")
+
+    # 第二次输入（带 /no_think）
+    start_time = time.time()
+    user_input_2 = "确定吗？/no_think"
+    print(f"用户: {user_input_2}")
+    response_2 = chatbot.generate_response(user_input_2)
+    print(f"机器人: {response_2}")
+    end_time = time.time()
+    print(f"\n--- 分割线（耗时：{end_time - start_time:.2f} 秒） ---\n")
+
+    # 第三次输入（带 /think）
+    start_time = time.time()
+    user_input_3 = "确定吗？/think"
+    print(f"用户: {user_input_3}")
+    response_3 = chatbot.generate_response(user_input_3)
+    print(f"机器人: {response_3}")
+    end_time = time.time()
+    print(f"\n--- 分割线（耗时：{end_time - start_time:.2f} 秒） ---\n")
\ No newline at end of file