guanjihuan.com/2025.11.19_modelscope_qwen/run_qwen3.py

"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/48066
"""

from modelscope import AutoModelForCausalLM, AutoTokenizer
import time

class QwenChatbot:
    def __init__(self, model_name="D:/models/Qwen/Qwen3-0.6B"):
        """
        初始化 Qwen 聊天机器人
        :param model_name: 模型路径
        """
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)  # 加载分词器
        self.model = AutoModelForCausalLM.from_pretrained(model_name)  # 加载模型
        self.history = []  # 存储对话历史

    def generate_response(self, user_input):
        """
        生成模型回复
        :param user_input: 用户输入
        :return: 模型回复
        """
        # 将历史对话和当前用户输入组合成消息列表
        messages = self.history + [{"role": "user", "content": user_input}]

        # 使用聊天模板格式化输入文本
        text = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,  # 不进行分词，返回字符串
            add_generation_prompt=True  # 添加生成提示
        )

        # 对文本进行分词，并返回PyTorch张量
        inputs = self.tokenizer(text, return_tensors="pt")

        # 生成回复，最多生成32768个新token
        response_ids = self.model.generate(**inputs, max_new_tokens=32768)[0][len(inputs.input_ids[0]):].tolist()

        # 解码生成的token，跳过特殊token
        response = self.tokenizer.decode(response_ids, skip_special_tokens=True)

        # 更新对话历史
        self.history.append({"role": "user", "content": user_input})
        self.history.append({"role": "assistant", "content": response})

        return response

# 示例使用
if __name__ == "__main__":
    chatbot = QwenChatbot()  # 创建聊天机器人实例

    # 第一次输入（不带/think或/no_think标签，默认启用思考模式）
    start_time = time.time()
    user_input_1 = "计算：1+1"
    print(f"用户: {user_input_1}")
    response_1 = chatbot.generate_response(user_input_1)
    print(f"机器人: {response_1}")
    end_time = time.time()
    print(f"\n--- 分割线（耗时：{end_time-start_time:.2f} 秒） ---\n")

    # 第二次输入带/no_think标签
    start_time = time.time()
    user_input_2 = "确定吗？/no_think"
    print(f"用户: {user_input_2}")
    response_2 = chatbot.generate_response(user_input_2)
    print(f"机器人: {response_2}")
    end_time = time.time()
    print(f"\n--- 分割线（耗时：{end_time-start_time:.2f} 秒） ---\n")

    # 第三次输入带/think标签
    start_time = time.time()
    user_input_3 = "确定吗？/think"
    print(f"用户: {user_input_3}")
    response_3 = chatbot.generate_response(user_input_3)
    print(f"机器人: {response_3}")
    end_time = time.time()
    print(f"\n--- 分割线（耗时：{end_time-start_time:.2f} 秒） ---\n")