update
This commit is contained in:
3
2025.11.19_modelscope_qwen/download_qwen3.py
Normal file
3
2025.11.19_modelscope_qwen/download_qwen3.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
# 模型下载
|
||||||
|
from modelscope import snapshot_download
|
||||||
|
model_dir = snapshot_download('Qwen/Qwen3-0.6B', local_dir='D:/models/Qwen/Qwen3-0.6B')
|
||||||
3
2025.11.19_modelscope_qwen/download_qwen3_GGUF.py
Normal file
3
2025.11.19_modelscope_qwen/download_qwen3_GGUF.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
# 模型下载
|
||||||
|
from modelscope import snapshot_download
|
||||||
|
model_dir = snapshot_download('Qwen/Qwen3-0.6B-GGUF', local_dir='D:/models/Qwen/Qwen3-0.6B-GGUF')
|
||||||
79
2025.11.19_modelscope_qwen/run_qwen3.py
Normal file
79
2025.11.19_modelscope_qwen/run_qwen3.py
Normal file
@@ -0,0 +1,79 @@
|
|||||||
|
"""
|
||||||
|
This code is supported by the website: https://www.guanjihuan.com
|
||||||
|
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/48066
|
||||||
|
"""
|
||||||
|
|
||||||
|
from modelscope import AutoModelForCausalLM, AutoTokenizer
|
||||||
|
import time
|
||||||
|
|
||||||
|
class QwenChatbot:
|
||||||
|
def __init__(self, model_name="D:/models/Qwen/Qwen3-0.6B"):
|
||||||
|
"""
|
||||||
|
初始化 Qwen 聊天机器人
|
||||||
|
:param model_name: 模型路径
|
||||||
|
"""
|
||||||
|
self.tokenizer = AutoTokenizer.from_pretrained(model_name) # 加载分词器
|
||||||
|
self.model = AutoModelForCausalLM.from_pretrained(model_name) # 加载模型
|
||||||
|
self.history = [] # 存储对话历史
|
||||||
|
|
||||||
|
def generate_response(self, user_input):
|
||||||
|
"""
|
||||||
|
生成模型回复
|
||||||
|
:param user_input: 用户输入
|
||||||
|
:return: 模型回复
|
||||||
|
"""
|
||||||
|
# 将历史对话和当前用户输入组合成消息列表
|
||||||
|
messages = self.history + [{"role": "user", "content": user_input}]
|
||||||
|
|
||||||
|
# 使用聊天模板格式化输入文本
|
||||||
|
text = self.tokenizer.apply_chat_template(
|
||||||
|
messages,
|
||||||
|
tokenize=False, # 不进行分词,返回字符串
|
||||||
|
add_generation_prompt=True # 添加生成提示
|
||||||
|
)
|
||||||
|
|
||||||
|
# 对文本进行分词,并返回PyTorch张量
|
||||||
|
inputs = self.tokenizer(text, return_tensors="pt")
|
||||||
|
|
||||||
|
# 生成回复,最多生成32768个新token
|
||||||
|
response_ids = self.model.generate(**inputs, max_new_tokens=32768)[0][len(inputs.input_ids[0]):].tolist()
|
||||||
|
|
||||||
|
# 解码生成的token,跳过特殊token
|
||||||
|
response = self.tokenizer.decode(response_ids, skip_special_tokens=True)
|
||||||
|
|
||||||
|
# 更新对话历史
|
||||||
|
self.history.append({"role": "user", "content": user_input})
|
||||||
|
self.history.append({"role": "assistant", "content": response})
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
# 示例使用
|
||||||
|
if __name__ == "__main__":
|
||||||
|
chatbot = QwenChatbot() # 创建聊天机器人实例
|
||||||
|
|
||||||
|
# 第一次输入(不带/think或/no_think标签,默认启用思考模式)
|
||||||
|
start_time = time.time()
|
||||||
|
user_input_1 = "计算:1+1"
|
||||||
|
print(f"用户: {user_input_1}")
|
||||||
|
response_1 = chatbot.generate_response(user_input_1)
|
||||||
|
print(f"机器人: {response_1}")
|
||||||
|
end_time = time.time()
|
||||||
|
print(f"\n--- 分割线(耗时:{end_time-start_time:.2f} 秒) ---\n")
|
||||||
|
|
||||||
|
# 第二次输入带/no_think标签
|
||||||
|
start_time = time.time()
|
||||||
|
user_input_2 = "确定吗?/no_think"
|
||||||
|
print(f"用户: {user_input_2}")
|
||||||
|
response_2 = chatbot.generate_response(user_input_2)
|
||||||
|
print(f"机器人: {response_2}")
|
||||||
|
end_time = time.time()
|
||||||
|
print(f"\n--- 分割线(耗时:{end_time-start_time:.2f} 秒) ---\n")
|
||||||
|
|
||||||
|
# 第三次输入带/think标签
|
||||||
|
start_time = time.time()
|
||||||
|
user_input_3 = "确定吗?/think"
|
||||||
|
print(f"用户: {user_input_3}")
|
||||||
|
response_3 = chatbot.generate_response(user_input_3)
|
||||||
|
print(f"机器人: {response_3}")
|
||||||
|
end_time = time.time()
|
||||||
|
print(f"\n--- 分割线(耗时:{end_time-start_time:.2f} 秒) ---\n")
|
||||||
80
2025.11.19_modelscope_qwen/run_qwen3_GGUF.py
Normal file
80
2025.11.19_modelscope_qwen/run_qwen3_GGUF.py
Normal file
@@ -0,0 +1,80 @@
|
|||||||
|
"""
|
||||||
|
This code is supported by the website: https://www.guanjihuan.com
|
||||||
|
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/48066
|
||||||
|
"""
|
||||||
|
|
||||||
|
from llama_cpp import Llama
|
||||||
|
import time
|
||||||
|
|
||||||
|
class QwenChatbotGGUF:
|
||||||
|
def __init__(self, model_path="D:/models/Qwen/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf", n_ctx=32768):
|
||||||
|
"""
|
||||||
|
初始化基于 GGUF 的 Qwen3 聊天机器人
|
||||||
|
:param model_path: GGUF 模型文件路径(必须是 Qwen3 的 GGUF 文件)
|
||||||
|
:param n_ctx: 上下文长度(Qwen3 支持长上下文,最大可设 32768)
|
||||||
|
"""
|
||||||
|
self.llm = Llama(
|
||||||
|
model_path=model_path,
|
||||||
|
n_ctx=n_ctx,
|
||||||
|
verbose=False,
|
||||||
|
chat_format="chatml", # Qwen 使用 ChatML 格式
|
||||||
|
logits_all=False
|
||||||
|
)
|
||||||
|
self.history = []
|
||||||
|
|
||||||
|
def generate_response(self, user_input):
|
||||||
|
"""
|
||||||
|
生成模型回复
|
||||||
|
:param user_input: 用户输入文本
|
||||||
|
:return: 模型回复文本
|
||||||
|
"""
|
||||||
|
# 将当前输入加入历史(role=user)
|
||||||
|
self.history.append({"role": "user", "content": user_input})
|
||||||
|
|
||||||
|
# 使用 llama.cpp 内置的 chat completion(自动处理模板)
|
||||||
|
response = self.llm.create_chat_completion(
|
||||||
|
messages=self.history,
|
||||||
|
max_tokens=2048,
|
||||||
|
temperature=0.6,
|
||||||
|
top_p=0.95,
|
||||||
|
repeat_penalty=1.5,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 提取助手回复内容
|
||||||
|
assistant_message = response["choices"][0]["message"]["content"].strip()
|
||||||
|
|
||||||
|
# 将助手回复加入历史
|
||||||
|
self.history.append({"role": "assistant", "content": assistant_message})
|
||||||
|
|
||||||
|
return assistant_message
|
||||||
|
|
||||||
|
# 示例使用
|
||||||
|
if __name__ == "__main__":
|
||||||
|
chatbot = QwenChatbotGGUF() # 创建聊天机器人实例
|
||||||
|
|
||||||
|
# 第一次输入
|
||||||
|
start_time = time.time()
|
||||||
|
user_input_1 = "计算:1+1"
|
||||||
|
print(f"用户: {user_input_1}")
|
||||||
|
response_1 = chatbot.generate_response(user_input_1)
|
||||||
|
print(f"机器人: {response_1}")
|
||||||
|
end_time = time.time()
|
||||||
|
print(f"\n--- 分割线(耗时:{end_time - start_time:.2f} 秒) ---\n")
|
||||||
|
|
||||||
|
# 第二次输入(带 /no_think)
|
||||||
|
start_time = time.time()
|
||||||
|
user_input_2 = "确定吗?/no_think"
|
||||||
|
print(f"用户: {user_input_2}")
|
||||||
|
response_2 = chatbot.generate_response(user_input_2)
|
||||||
|
print(f"机器人: {response_2}")
|
||||||
|
end_time = time.time()
|
||||||
|
print(f"\n--- 分割线(耗时:{end_time - start_time:.2f} 秒) ---\n")
|
||||||
|
|
||||||
|
# 第三次输入(带 /think)
|
||||||
|
start_time = time.time()
|
||||||
|
user_input_3 = "确定吗?/think"
|
||||||
|
print(f"用户: {user_input_3}")
|
||||||
|
response_3 = chatbot.generate_response(user_input_3)
|
||||||
|
print(f"机器人: {response_3}")
|
||||||
|
end_time = time.time()
|
||||||
|
print(f"\n--- 分割线(耗时:{end_time - start_time:.2f} 秒) ---\n")
|
||||||
Reference in New Issue
Block a user