Files
guanjihuan.com/2025.11.19_modelscope_qwen/run_qwen3.py
2025-11-19 20:08:38 +08:00

79 lines
3.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/48066
"""
from modelscope import AutoModelForCausalLM, AutoTokenizer
import time
class QwenChatbot:
def __init__(self, model_name="D:/models/Qwen/Qwen3-0.6B"):
"""
初始化 Qwen 聊天机器人
:param model_name: 模型路径
"""
self.tokenizer = AutoTokenizer.from_pretrained(model_name) # 加载分词器
self.model = AutoModelForCausalLM.from_pretrained(model_name) # 加载模型
self.history = [] # 存储对话历史
def generate_response(self, user_input):
"""
生成模型回复
:param user_input: 用户输入
:return: 模型回复
"""
# 将历史对话和当前用户输入组合成消息列表
messages = self.history + [{"role": "user", "content": user_input}]
# 使用聊天模板格式化输入文本
text = self.tokenizer.apply_chat_template(
messages,
tokenize=False, # 不进行分词,返回字符串
add_generation_prompt=True # 添加生成提示
)
# 对文本进行分词并返回PyTorch张量
inputs = self.tokenizer(text, return_tensors="pt")
# 生成回复最多生成32768个新token
response_ids = self.model.generate(**inputs, max_new_tokens=32768)[0][len(inputs.input_ids[0]):].tolist()
# 解码生成的token跳过特殊token
response = self.tokenizer.decode(response_ids, skip_special_tokens=True)
# 更新对话历史
self.history.append({"role": "user", "content": user_input})
self.history.append({"role": "assistant", "content": response})
return response
# 示例使用
if __name__ == "__main__":
chatbot = QwenChatbot() # 创建聊天机器人实例
# 第一次输入(不带/think或/no_think标签默认启用思考模式
start_time = time.time()
user_input_1 = "计算1+1"
print(f"用户: {user_input_1}")
response_1 = chatbot.generate_response(user_input_1)
print(f"机器人: {response_1}")
end_time = time.time()
print(f"\n--- 分割线(耗时:{end_time-start_time:.2f} 秒) ---\n")
# 第二次输入带/no_think标签
start_time = time.time()
user_input_2 = "确定吗?/no_think"
print(f"用户: {user_input_2}")
response_2 = chatbot.generate_response(user_input_2)
print(f"机器人: {response_2}")
end_time = time.time()
print(f"\n--- 分割线(耗时:{end_time-start_time:.2f} 秒) ---\n")
# 第三次输入带/think标签
start_time = time.time()
user_input_3 = "确定吗?/think"
print(f"用户: {user_input_3}")
response_3 = chatbot.generate_response(user_input_3)
print(f"机器人: {response_3}")
end_time = time.time()
print(f"\n--- 分割线(耗时:{end_time-start_time:.2f} 秒) ---\n")