Compare commits
12 Commits
fd6f65c3be
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| 4986a0d617 | |||
| a65f06332e | |||
| b8fea19fc3 | |||
| fd43e303fa | |||
| e75bf4d5f4 | |||
| 2a65f6f639 | |||
| 579a575087 | |||
| d1df40c1cd | |||
| 7bcf9f0c87 | |||
| 01a35075d2 | |||
| a75792f64a | |||
| 639f04868f |
@@ -1,7 +1,7 @@
|
|||||||
[metadata]
|
[metadata]
|
||||||
# replace with your username:
|
# replace with your username:
|
||||||
name = guan
|
name = guan
|
||||||
version = 0.1.188
|
version = 0.1.200
|
||||||
author = guanjihuan
|
author = guanjihuan
|
||||||
author_email = guanjihuan@163.com
|
author_email = guanjihuan@163.com
|
||||||
description = An open source python package
|
description = An open source python package
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
Metadata-Version: 2.4
|
Metadata-Version: 2.4
|
||||||
Name: guan
|
Name: guan
|
||||||
Version: 0.1.188
|
Version: 0.1.200
|
||||||
Summary: An open source python package
|
Summary: An open source python package
|
||||||
Home-page: https://py.guanjihuan.com
|
Home-page: https://py.guanjihuan.com
|
||||||
Author: guanjihuan
|
Author: guanjihuan
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# Module: AI_chat
|
# Module: AI_chat
|
||||||
|
|
||||||
# AI 对话
|
# AI 对话(当前默认模型为 'hunyuan-lite',无记忆)
|
||||||
def chat(prompt='你好', model=1, stream=1, stream_label=0):
|
def chat(prompt='你好', model=1, stream=1, stream_label=0):
|
||||||
import requests
|
import requests
|
||||||
url = "http://api.guanjihuan.com/chat"
|
url = "http://api.guanjihuan.com/chat"
|
||||||
@@ -27,7 +27,7 @@ def chat(prompt='你好', model=1, stream=1, stream_label=0):
|
|||||||
print('\n--- End Chat Stream Message ---\n')
|
print('\n--- End Chat Stream Message ---\n')
|
||||||
return response
|
return response
|
||||||
|
|
||||||
# 加上函数代码的 AI 对话
|
# 加上函数代码的 AI 对话(当前默认模型为 'hunyuan-lite',无记忆)
|
||||||
def chat_with_function_code(function_name, prompt='', model=1, stream=1):
|
def chat_with_function_code(function_name, prompt='', model=1, stream=1):
|
||||||
import guan
|
import guan
|
||||||
function_source = guan.get_source(function_name)
|
function_source = guan.get_source(function_name)
|
||||||
@@ -37,7 +37,7 @@ def chat_with_function_code(function_name, prompt='', model=1, stream=1):
|
|||||||
response = guan.chat(prompt=function_source+'\n\n'+prompt, model=model, stream=stream)
|
response = guan.chat(prompt=function_source+'\n\n'+prompt, model=model, stream=stream)
|
||||||
return response
|
return response
|
||||||
|
|
||||||
# 机器人自动对话
|
# 机器人自动对话(当前默认模型为 'hunyuan-lite',无记忆)
|
||||||
def auto_chat(prompt='你好', round=2, model=1, stream=1):
|
def auto_chat(prompt='你好', round=2, model=1, stream=1):
|
||||||
import guan
|
import guan
|
||||||
response0 = prompt
|
response0 = prompt
|
||||||
@@ -48,7 +48,7 @@ def auto_chat(prompt='你好', round=2, model=1, stream=1):
|
|||||||
print('机器人 2: ')
|
print('机器人 2: ')
|
||||||
response0 = guan.chat(prompt=response1, model=model, stream=stream)
|
response0 = guan.chat(prompt=response1, model=model, stream=stream)
|
||||||
|
|
||||||
# 机器人自动对话(引导对话)
|
# 机器人自动对话(引导对话)(当前默认模型为 'hunyuan-lite',无记忆)
|
||||||
def auto_chat_with_guide(prompt='你好', guide_message='(回答字数少于30个字,最后反问我一个问题)', round=5, model=1, stream=1):
|
def auto_chat_with_guide(prompt='你好', guide_message='(回答字数少于30个字,最后反问我一个问题)', round=5, model=1, stream=1):
|
||||||
import guan
|
import guan
|
||||||
response0 = prompt
|
response0 = prompt
|
||||||
@@ -59,8 +59,46 @@ def auto_chat_with_guide(prompt='你好', guide_message='(回答字数少于30
|
|||||||
print('机器人 2: ')
|
print('机器人 2: ')
|
||||||
response0 = guan.chat(prompt=response1+guide_message, model=model, stream=stream)
|
response0 = guan.chat(prompt=response1+guide_message, model=model, stream=stream)
|
||||||
|
|
||||||
# 使用 LangChain 无记忆对话(需要 API Key)
|
# 使用 OpenAI 框架对话(需要 API Key)
|
||||||
def langchain_chat_without_memory(prompt="你好", temperature=0.7, system_message=None, print_show=1, load_env=1):
|
def openai_chat(prompt="你好", model="qwen-plus", temperature=0.7, system_message=None, history=[], print_show=1, load_env=1):
|
||||||
|
import os
|
||||||
|
from openai import OpenAI
|
||||||
|
if load_env:
|
||||||
|
import dotenv
|
||||||
|
from pathlib import Path
|
||||||
|
import inspect
|
||||||
|
caller_frame = inspect.stack()[1]
|
||||||
|
caller_dir = Path(caller_frame.filename).parent
|
||||||
|
env_path = caller_dir / ".env"
|
||||||
|
if env_path.exists():
|
||||||
|
dotenv.load_dotenv(env_path)
|
||||||
|
client = OpenAI(
|
||||||
|
api_key=os.getenv("OPENAI_API_KEY"),
|
||||||
|
base_url=os.getenv("DASHSCOPE_BASE_URL"),
|
||||||
|
)
|
||||||
|
if system_message == None:
|
||||||
|
messages = history+[{"role": "user", "content": prompt}]
|
||||||
|
else:
|
||||||
|
messages = [{"role": "system", "content": system_message}]+history+[{"role": "user", "content": prompt}]
|
||||||
|
completion = client.chat.completions.create(
|
||||||
|
model=model,
|
||||||
|
messages=messages,
|
||||||
|
temperature=temperature,
|
||||||
|
stream=True,
|
||||||
|
)
|
||||||
|
response = ''
|
||||||
|
for chunk in completion:
|
||||||
|
response += chunk.choices[0].delta.content
|
||||||
|
if print_show:
|
||||||
|
print(chunk.choices[0].delta.content, end="", flush=True)
|
||||||
|
if print_show:
|
||||||
|
print()
|
||||||
|
history.append({"role": "user", "content": prompt})
|
||||||
|
history.append({"role": "assistant", "content": response})
|
||||||
|
return response, history
|
||||||
|
|
||||||
|
# 通过 LangChain 加载模型(需要 API Key)
|
||||||
|
def load_langchain_model(model="qwen-plus", temperature=0.7, load_env=1):
|
||||||
from langchain_openai import ChatOpenAI
|
from langchain_openai import ChatOpenAI
|
||||||
from langchain_core.prompts import ChatPromptTemplate
|
from langchain_core.prompts import ChatPromptTemplate
|
||||||
import os
|
import os
|
||||||
@@ -76,7 +114,30 @@ def langchain_chat_without_memory(prompt="你好", temperature=0.7, system_messa
|
|||||||
llm = ChatOpenAI(
|
llm = ChatOpenAI(
|
||||||
api_key=os.getenv("OPENAI_API_KEY"),
|
api_key=os.getenv("OPENAI_API_KEY"),
|
||||||
base_url=os.getenv("DASHSCOPE_BASE_URL"),
|
base_url=os.getenv("DASHSCOPE_BASE_URL"),
|
||||||
model="qwen-plus",
|
model=model,
|
||||||
|
temperature=temperature,
|
||||||
|
streaming=True,
|
||||||
|
)
|
||||||
|
return llm
|
||||||
|
|
||||||
|
# 使用 LangChain 无记忆对话(需要 API Key)
|
||||||
|
def langchain_chat_without_memory(prompt="你好", model="qwen-plus", temperature=0.7, system_message=None, print_show=1, load_env=1):
|
||||||
|
from langchain_openai import ChatOpenAI
|
||||||
|
from langchain_core.prompts import ChatPromptTemplate
|
||||||
|
import os
|
||||||
|
if load_env:
|
||||||
|
import dotenv
|
||||||
|
from pathlib import Path
|
||||||
|
import inspect
|
||||||
|
caller_frame = inspect.stack()[1]
|
||||||
|
caller_dir = Path(caller_frame.filename).parent
|
||||||
|
env_path = caller_dir / ".env"
|
||||||
|
if env_path.exists():
|
||||||
|
dotenv.load_dotenv(env_path)
|
||||||
|
llm = ChatOpenAI(
|
||||||
|
api_key=os.getenv("OPENAI_API_KEY"),
|
||||||
|
base_url=os.getenv("DASHSCOPE_BASE_URL"),
|
||||||
|
model=model,
|
||||||
temperature=temperature,
|
temperature=temperature,
|
||||||
streaming=True,
|
streaming=True,
|
||||||
)
|
)
|
||||||
@@ -100,7 +161,7 @@ def langchain_chat_without_memory(prompt="你好", temperature=0.7, system_messa
|
|||||||
return response
|
return response
|
||||||
|
|
||||||
# 使用 LangChain 有记忆对话(记忆临时保存在函数的属性上,需要 API Key)
|
# 使用 LangChain 有记忆对话(记忆临时保存在函数的属性上,需要 API Key)
|
||||||
def langchain_chat_with_memory(prompt="你好", temperature=0.7, system_message=None, session_id="default", print_show=1, load_env=1):
|
def langchain_chat_with_memory(prompt="你好", model="qwen-plus", temperature=0.7, system_message=None, session_id="default", print_show=1, load_env=1):
|
||||||
from langchain_openai import ChatOpenAI
|
from langchain_openai import ChatOpenAI
|
||||||
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||||
from langchain_core.runnables.history import RunnableWithMessageHistory
|
from langchain_core.runnables.history import RunnableWithMessageHistory
|
||||||
@@ -118,7 +179,7 @@ def langchain_chat_with_memory(prompt="你好", temperature=0.7, system_message=
|
|||||||
llm = ChatOpenAI(
|
llm = ChatOpenAI(
|
||||||
api_key=os.getenv("OPENAI_API_KEY"),
|
api_key=os.getenv("OPENAI_API_KEY"),
|
||||||
base_url=os.getenv("DASHSCOPE_BASE_URL"),
|
base_url=os.getenv("DASHSCOPE_BASE_URL"),
|
||||||
model="qwen-plus",
|
model=model,
|
||||||
temperature=temperature,
|
temperature=temperature,
|
||||||
streaming=True,
|
streaming=True,
|
||||||
)
|
)
|
||||||
@@ -157,6 +218,125 @@ def langchain_chat_with_memory(prompt="你好", temperature=0.7, system_message=
|
|||||||
print()
|
print()
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
# 使用 LangChain 调用工具对话(需要 API Key)
|
||||||
|
def langchain_chat_with_tools(prompt="你好", model="qwen-plus", temperature=0.7, system_message=None, tools=None, print_show=1, load_env=1):
|
||||||
|
import guan
|
||||||
|
if tools==None:
|
||||||
|
response = guan.langchain_chat_without_memory(prompt=prompt, model=model, temperature=temperature, system_message=system_message, print_show=print_show, load_env=load_env)
|
||||||
|
else:
|
||||||
|
import os
|
||||||
|
from langchain_openai import ChatOpenAI
|
||||||
|
from langchain_core.prompts import ChatPromptTemplate
|
||||||
|
from langchain.agents import create_openai_tools_agent, AgentExecutor
|
||||||
|
if load_env:
|
||||||
|
import dotenv
|
||||||
|
from pathlib import Path
|
||||||
|
import inspect
|
||||||
|
caller_frame = inspect.stack()[1]
|
||||||
|
caller_dir = Path(caller_frame.filename).parent
|
||||||
|
env_path = caller_dir / ".env"
|
||||||
|
if env_path.exists():
|
||||||
|
dotenv.load_dotenv(env_path)
|
||||||
|
llm = ChatOpenAI(
|
||||||
|
api_key=os.getenv("OPENAI_API_KEY"),
|
||||||
|
base_url=os.getenv("DASHSCOPE_BASE_URL"),
|
||||||
|
model=model,
|
||||||
|
temperature=temperature,
|
||||||
|
streaming=False,
|
||||||
|
)
|
||||||
|
if system_message == None:
|
||||||
|
prompt_template = ChatPromptTemplate.from_messages([
|
||||||
|
("human", "{input_message}"),
|
||||||
|
("placeholder", "{agent_scratchpad}"),
|
||||||
|
])
|
||||||
|
else:
|
||||||
|
prompt_template = ChatPromptTemplate.from_messages([
|
||||||
|
("system", system_message),
|
||||||
|
("human", "{input_message}"),
|
||||||
|
("placeholder", "{agent_scratchpad}"),
|
||||||
|
])
|
||||||
|
agent = create_openai_tools_agent(llm, tools, prompt_template)
|
||||||
|
agent_executor = AgentExecutor(
|
||||||
|
agent=agent,
|
||||||
|
tools=tools,
|
||||||
|
verbose=bool(print_show),
|
||||||
|
handle_parsing_errors=True,
|
||||||
|
)
|
||||||
|
response_result = agent_executor.invoke({"input_message": prompt})
|
||||||
|
response = response_result["output"]
|
||||||
|
if print_show:
|
||||||
|
print('\n'+response)
|
||||||
|
return response
|
||||||
|
|
||||||
|
# 使用 LangChain 调用工具有记忆对话(记忆临时保存在函数的属性上,需要 API Key)
|
||||||
|
def langchain_chat_with_tools_and_memory(prompt="你好", model="qwen-plus", temperature=0.7, system_message=None, tools=None, session_id="default", print_show=1, load_env=1):
|
||||||
|
import guan
|
||||||
|
if tools==None:
|
||||||
|
response = guan.langchain_chat_with_memory(prompt=prompt, model=model, temperature=temperature, system_message=system_message, session_id=session_id, print_show=print_show, load_env=load_env)
|
||||||
|
else:
|
||||||
|
import os
|
||||||
|
from langchain_openai import ChatOpenAI
|
||||||
|
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
|
||||||
|
from langchain_core.runnables.history import RunnableWithMessageHistory
|
||||||
|
from langchain_community.chat_message_histories import ChatMessageHistory
|
||||||
|
from langchain.agents import create_openai_tools_agent, AgentExecutor
|
||||||
|
if load_env:
|
||||||
|
import dotenv
|
||||||
|
from pathlib import Path
|
||||||
|
import inspect
|
||||||
|
caller_frame = inspect.stack()[1]
|
||||||
|
caller_dir = Path(caller_frame.filename).parent
|
||||||
|
env_path = caller_dir / ".env"
|
||||||
|
if env_path.exists():
|
||||||
|
dotenv.load_dotenv(env_path)
|
||||||
|
llm = ChatOpenAI(
|
||||||
|
api_key=os.getenv("OPENAI_API_KEY"),
|
||||||
|
base_url=os.getenv("DASHSCOPE_BASE_URL"),
|
||||||
|
model=model,
|
||||||
|
temperature=temperature,
|
||||||
|
streaming=False,
|
||||||
|
)
|
||||||
|
if system_message == None:
|
||||||
|
prompt_template = ChatPromptTemplate.from_messages([
|
||||||
|
MessagesPlaceholder("history"),
|
||||||
|
("human", "{input_message}"),
|
||||||
|
("placeholder", "{agent_scratchpad}"),
|
||||||
|
])
|
||||||
|
else:
|
||||||
|
prompt_template = ChatPromptTemplate.from_messages([
|
||||||
|
("system", system_message),
|
||||||
|
MessagesPlaceholder("history"),
|
||||||
|
("human", "{input_message}"),
|
||||||
|
("placeholder", "{agent_scratchpad}"),
|
||||||
|
])
|
||||||
|
|
||||||
|
if not hasattr(langchain_chat_with_tools_and_memory, "store"):
|
||||||
|
langchain_chat_with_tools_and_memory.store = {}
|
||||||
|
|
||||||
|
def get_session_history(sid: str):
|
||||||
|
if sid not in langchain_chat_with_tools_and_memory.store:
|
||||||
|
langchain_chat_with_tools_and_memory.store[sid] = ChatMessageHistory()
|
||||||
|
return langchain_chat_with_tools_and_memory.store[sid]
|
||||||
|
|
||||||
|
agent = create_openai_tools_agent(llm, tools, prompt_template)
|
||||||
|
agent_executor = AgentExecutor(
|
||||||
|
agent=agent,
|
||||||
|
tools=tools,
|
||||||
|
verbose=bool(print_show),
|
||||||
|
handle_parsing_errors=True,
|
||||||
|
)
|
||||||
|
agent_with_chat_history = RunnableWithMessageHistory(
|
||||||
|
agent_executor,
|
||||||
|
get_session_history,
|
||||||
|
input_messages_key="input_message",
|
||||||
|
history_messages_key="history",
|
||||||
|
)
|
||||||
|
response_result = agent_with_chat_history.invoke({"input_message": prompt}, config={"configurable": {"session_id": session_id}})
|
||||||
|
response = response_result["output"]
|
||||||
|
if print_show:
|
||||||
|
print('\n'+response)
|
||||||
|
return response
|
||||||
|
|
||||||
# 使用 Ollama 本地模型对话(需要运行 Ollama 和下载对应的模型)
|
# 使用 Ollama 本地模型对话(需要运行 Ollama 和下载对应的模型)
|
||||||
def ollama_chat(prompt='你好/no_think', model="qwen3:0.6b", temperature=0.8, print_show=1):
|
def ollama_chat(prompt='你好/no_think', model="qwen3:0.6b", temperature=0.8, print_show=1):
|
||||||
import ollama
|
import ollama
|
||||||
@@ -181,14 +361,14 @@ def ollama_chat(prompt='你好/no_think', model="qwen3:0.6b", temperature=0.8, p
|
|||||||
print()
|
print()
|
||||||
return response
|
return response
|
||||||
|
|
||||||
# ModelScope 加载本地模型和分词器(只加载一次)
|
# ModelScope 加载本地模型和分词器(只加载一次,需要有模型文件)
|
||||||
def load_modelscope_model(model_name="D:/models/Qwen/Qwen3-0.6B"):
|
def load_modelscope_model(model_name="D:/models/Qwen/Qwen3-0.6B"):
|
||||||
from modelscope import AutoModelForCausalLM, AutoTokenizer
|
from modelscope import AutoModelForCausalLM, AutoTokenizer
|
||||||
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
||||||
model = AutoModelForCausalLM.from_pretrained(model_name)
|
model = AutoModelForCausalLM.from_pretrained(model_name)
|
||||||
return model, tokenizer
|
return model, tokenizer
|
||||||
|
|
||||||
# 使用 ModelScope 本地模型聊天
|
# 使用 ModelScope 本地模型聊天(需要有模型文件)
|
||||||
def modelscope_chat(model, tokenizer, prompt='你好 /no_think', history=[], temperature=0.7, top_p=0.8, print_show=1):
|
def modelscope_chat(model, tokenizer, prompt='你好 /no_think', history=[], temperature=0.7, top_p=0.8, print_show=1):
|
||||||
from threading import Thread
|
from threading import Thread
|
||||||
from transformers import TextIteratorStreamer
|
from transformers import TextIteratorStreamer
|
||||||
@@ -218,13 +398,13 @@ def modelscope_chat(model, tokenizer, prompt='你好 /no_think', history=[], tem
|
|||||||
response += new_text
|
response += new_text
|
||||||
if print_show:
|
if print_show:
|
||||||
print()
|
print()
|
||||||
new_history = history + [
|
history += [
|
||||||
{"role": "user", "content": prompt},
|
{"role": "user", "content": prompt},
|
||||||
{"role": "assistant", "content": response}
|
{"role": "assistant", "content": response}
|
||||||
]
|
]
|
||||||
return response, new_history
|
return response, history
|
||||||
|
|
||||||
# LLaMA 加载本地模型(只加载一次)
|
# LLaMA 加载本地模型(只加载一次,需要有模型文件)
|
||||||
def load_llama_model(model_path="D:/models/Qwen/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf"):
|
def load_llama_model(model_path="D:/models/Qwen/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.gguf"):
|
||||||
from llama_cpp import Llama
|
from llama_cpp import Llama
|
||||||
llm = Llama(
|
llm = Llama(
|
||||||
@@ -236,11 +416,11 @@ def load_llama_model(model_path="D:/models/Qwen/Qwen3-0.6B-GGUF/Qwen3-0.6B-Q8_0.
|
|||||||
)
|
)
|
||||||
return llm
|
return llm
|
||||||
|
|
||||||
# 使用 LLaMA 本地模型聊天
|
# 使用 LLaMA 本地模型聊天(需要有模型文件)
|
||||||
def llama_chat(llm, prompt='你好 /no_think', history=[], temperature=0.7, top_p=0.8, print_show=1):
|
def llama_chat(llm, prompt='你好 /no_think', history=[], temperature=0.7, top_p=0.8, print_show=1):
|
||||||
new_history = history + [{"role": "user", "content": prompt}]
|
history += [{"role": "user", "content": prompt}]
|
||||||
llm_response = llm.create_chat_completion(
|
llm_response = llm.create_chat_completion(
|
||||||
messages=new_history,
|
messages=history,
|
||||||
temperature=temperature,
|
temperature=temperature,
|
||||||
top_p=top_p,
|
top_p=top_p,
|
||||||
repeat_penalty=1.5,
|
repeat_penalty=1.5,
|
||||||
@@ -256,5 +436,5 @@ def llama_chat(llm, prompt='你好 /no_think', history=[], temperature=0.7, top_
|
|||||||
print(token, end="", flush=True)
|
print(token, end="", flush=True)
|
||||||
if print_show:
|
if print_show:
|
||||||
print()
|
print()
|
||||||
new_history.append({"role": "assistant", "content": response})
|
history.append({"role": "assistant", "content": response})
|
||||||
return response, new_history
|
return response, history
|
||||||
@@ -11,6 +11,16 @@ def logging_with_day_and_time(content='', filename='time_logging', file_format='
|
|||||||
else:
|
else:
|
||||||
f2.write(datetime_today+' '+datetime_time+' '+str(content)+'\n')
|
f2.write(datetime_today+' '+datetime_time+' '+str(content)+'\n')
|
||||||
|
|
||||||
|
# 获取当前位置的 Unix 时间戳,并打印某段程序的运行时间
|
||||||
|
def record_time_and_print_running_time(start_time=None):
|
||||||
|
import time
|
||||||
|
current_time = time.time()
|
||||||
|
if start_time == None:
|
||||||
|
print("\n--- 开始计时(第一个记录点)---\n")
|
||||||
|
else:
|
||||||
|
print(f"\n--- 自上一个记录点已运行: {current_time - start_time:.2f} 秒 ---\n")
|
||||||
|
return current_time
|
||||||
|
|
||||||
# 使用该函数运行某个函数并获取函数计算时间(秒)
|
# 使用该函数运行某个函数并获取函数计算时间(秒)
|
||||||
def timer(function_name, *args, **kwargs):
|
def timer(function_name, *args, **kwargs):
|
||||||
import time
|
import time
|
||||||
@@ -80,6 +90,69 @@ def loop_calculation_with_three_parameters(function_name, parameter_array_1, par
|
|||||||
i1 += 1
|
i1 += 1
|
||||||
return result_array
|
return result_array
|
||||||
|
|
||||||
|
# 文本对比
|
||||||
|
def word_diff(a, b, print_show=1):
|
||||||
|
import difflib
|
||||||
|
import jieba
|
||||||
|
import logging
|
||||||
|
jieba.setLogLevel(logging.ERROR)
|
||||||
|
a_words = jieba.lcut(a)
|
||||||
|
b_words = jieba.lcut(b)
|
||||||
|
sm = difflib.SequenceMatcher(None, a_words, b_words, autojunk=False)
|
||||||
|
result = []
|
||||||
|
for tag, i1, i2, j1, j2 in sm.get_opcodes():
|
||||||
|
if tag == "equal":
|
||||||
|
result.extend(a_words[i1:i2])
|
||||||
|
elif tag == "delete":
|
||||||
|
result.append("\033[9;91m" + "".join(a_words[i1:i2]) + "\033[0m")
|
||||||
|
elif tag == "insert":
|
||||||
|
result.append("\033[92m" + "".join(b_words[j1:j2]) + "\033[0m")
|
||||||
|
elif tag == "replace":
|
||||||
|
result.append("\033[9;91m" + "".join(a_words[i1:i2]) + "\033[0m")
|
||||||
|
result.append(" ")
|
||||||
|
result.append("\033[92m" + "".join(b_words[j1:j2]) + "\033[0m")
|
||||||
|
diff_result = "".join(result)
|
||||||
|
if print_show:
|
||||||
|
print(diff_result)
|
||||||
|
return diff_result
|
||||||
|
|
||||||
|
# 文本对比(写入HTML文件)
|
||||||
|
def word_diff_to_html(a, b, filename='diff_result', write_file=1):
|
||||||
|
import difflib
|
||||||
|
from html import escape
|
||||||
|
import jieba
|
||||||
|
import logging
|
||||||
|
jieba.setLogLevel(logging.ERROR)
|
||||||
|
a_words = jieba.lcut(a)
|
||||||
|
b_words = jieba.lcut(b)
|
||||||
|
sm = difflib.SequenceMatcher(None, a_words, b_words, autojunk=False)
|
||||||
|
html_parts = []
|
||||||
|
for tag, i1, i2, j1, j2 in sm.get_opcodes():
|
||||||
|
if tag == "equal":
|
||||||
|
html_parts.append("".join(map(escape, a_words[i1:i2])))
|
||||||
|
elif tag == "delete":
|
||||||
|
html_parts.append(f"<span style='background:#e74c3c;color:white;padding:1px 2px;border-radius:2px;text-decoration:line-through;'>"
|
||||||
|
+ "".join(map(escape, a_words[i1:i2]))
|
||||||
|
+ "</span>")
|
||||||
|
elif tag == "insert":
|
||||||
|
html_parts.append(f"<span style='background:#2ecc71;color:white;padding:1px 2px;border-radius:2px;'>"
|
||||||
|
+ "".join(map(escape, b_words[j1:j2]))
|
||||||
|
+ "</span>")
|
||||||
|
elif tag == "replace":
|
||||||
|
html_parts.append(f"<span style='background:#e74c3c;color:white;padding:1px 2px;border-radius:2px;text-decoration:line-through;'>"
|
||||||
|
+ "".join(map(escape, a_words[i1:i2]))
|
||||||
|
+ "</span>")
|
||||||
|
html_parts.append(" ")
|
||||||
|
html_parts.append(f"<span style='background:#2ecc71;color:white;padding:1px 2px;border-radius:2px;'>"
|
||||||
|
+ "".join(map(escape, b_words[j1:j2]))
|
||||||
|
+ "</span>")
|
||||||
|
diff_result = "".join(html_parts)
|
||||||
|
diff_result = diff_result.replace("\n", "<br>")
|
||||||
|
if write_file:
|
||||||
|
with open(filename+'.html', 'w', encoding='UTF-8') as f:
|
||||||
|
f.write(diff_result)
|
||||||
|
return diff_result
|
||||||
|
|
||||||
# 打印数组
|
# 打印数组
|
||||||
def print_array(array, line_break=0):
|
def print_array(array, line_break=0):
|
||||||
if line_break == 0:
|
if line_break == 0:
|
||||||
@@ -168,6 +241,42 @@ def standard_deviation_with_formula(data_array):
|
|||||||
std_result = np.sqrt(averaged_squared_data-averaged_data**2)
|
std_result = np.sqrt(averaged_squared_data-averaged_data**2)
|
||||||
return std_result
|
return std_result
|
||||||
|
|
||||||
|
# 使用公式计算皮尔逊相关系数
|
||||||
|
def calculate_pearson_correlation(x_array, y_array):
|
||||||
|
import numpy as np
|
||||||
|
mean_x = np.mean(x_array)
|
||||||
|
mean_y = np.mean(y_array)
|
||||||
|
numerator = np.sum((x_array - mean_x) * (y_array - mean_y))
|
||||||
|
sum_sq_x = np.sum((x_array - mean_x) ** 2)
|
||||||
|
sum_sq_y = np.sum((y_array - mean_y) ** 2)
|
||||||
|
denominator = np.sqrt(sum_sq_x * sum_sq_y)
|
||||||
|
correlation = numerator / denominator
|
||||||
|
return correlation
|
||||||
|
|
||||||
|
# 使用 scipy 计算皮尔逊相关系数和 p 值
|
||||||
|
def calculate_pearson_correlation_with_scipy(x_array, y_array):
|
||||||
|
import scipy.stats
|
||||||
|
correlation, p_value = scipy.stats.pearsonr(x_array, y_array)
|
||||||
|
return correlation, p_value
|
||||||
|
|
||||||
|
# 使用 scipy 计算多个数组的皮尔逊相关系数和 p 值的矩阵
|
||||||
|
def calculate_correlation_matrix_for_multiple_arrays(multiple_arrays):
|
||||||
|
import scipy.stats
|
||||||
|
import numpy as np
|
||||||
|
num_arrays = len(multiple_arrays)
|
||||||
|
correlation_matrix = np.zeros((num_arrays, num_arrays))
|
||||||
|
p_value_matrix = np.zeros((num_arrays, num_arrays))
|
||||||
|
row_idx = 0
|
||||||
|
for array_1 in multiple_arrays:
|
||||||
|
col_idx = 0
|
||||||
|
for array_2 in multiple_arrays:
|
||||||
|
correlation, p_value = scipy.stats.pearsonr(array_1, array_2)
|
||||||
|
correlation_matrix[row_idx, col_idx] = correlation
|
||||||
|
p_value_matrix[row_idx, col_idx] = p_value
|
||||||
|
col_idx += 1
|
||||||
|
row_idx += 1
|
||||||
|
return correlation_matrix, p_value_matrix
|
||||||
|
|
||||||
# 获取两个模式之间的字符串
|
# 获取两个模式之间的字符串
|
||||||
def get_string_between_two_patterns(original_string, start, end, include_start_and_end=0):
|
def get_string_between_two_patterns(original_string, start, end, include_start_and_end=0):
|
||||||
import re
|
import re
|
||||||
@@ -274,6 +383,14 @@ def run_programs_sequentially(program_files=['./a.py', './b.py'], execute='pytho
|
|||||||
end = time.time()
|
end = time.time()
|
||||||
print('Total running time = '+str((end-start)/60)+' min')
|
print('Total running time = '+str((end-start)/60)+' min')
|
||||||
|
|
||||||
|
# 根据 “.” 和 “。” 符号进行分句
|
||||||
|
def split_text_into_sentences(text):
|
||||||
|
import re
|
||||||
|
pattern = r'(?<=[。])|(?<=\.)(?=\s|$)'
|
||||||
|
sentences = re.split(pattern, text)
|
||||||
|
sentence_array = [s.strip() for s in sentences if s.strip()]
|
||||||
|
return sentence_array
|
||||||
|
|
||||||
# 根据一定的字符长度来分割文本
|
# 根据一定的字符长度来分割文本
|
||||||
def split_text(text, width=100):
|
def split_text(text, width=100):
|
||||||
split_text_list = [text[i:i+width] for i in range(0, len(text), width)]
|
split_text_list = [text[i:i+width] for i in range(0, len(text), width)]
|
||||||
|
|||||||
@@ -471,7 +471,7 @@ def open_file(filename='a', file_format='.txt', mode='add'):
|
|||||||
f = open(filename+file_format, 'w', encoding='UTF-8')
|
f = open(filename+file_format, 'w', encoding='UTF-8')
|
||||||
return f
|
return f
|
||||||
|
|
||||||
# 打印到TXT文件
|
# 打印到TXT文件(补充内容)
|
||||||
def print_to_file(*args, filename='print_result', file_format='.txt', print_on=True):
|
def print_to_file(*args, filename='print_result', file_format='.txt', print_on=True):
|
||||||
if print_on==True:
|
if print_on==True:
|
||||||
for arg in args:
|
for arg in args:
|
||||||
@@ -483,7 +483,12 @@ def print_to_file(*args, filename='print_result', file_format='.txt', print_on=T
|
|||||||
f.write('\n')
|
f.write('\n')
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
# 读取文本文件内容。如果文件不存在,返回空字符串
|
# 写入到文本文件(覆盖内容)
|
||||||
|
def write_text_file(content, filename='a', file_format='.txt'):
|
||||||
|
with open(filename+file_format, 'w', encoding='UTF-8') as f:
|
||||||
|
f.write(content)
|
||||||
|
|
||||||
|
# 读取文本文件内容(如果文件不存在,返回空字符串)
|
||||||
def read_text_file(file_path='./a.txt', make_file=None):
|
def read_text_file(file_path='./a.txt', make_file=None):
|
||||||
import os
|
import os
|
||||||
if not os.path.exists(file_path):
|
if not os.path.exists(file_path):
|
||||||
|
|||||||
@@ -28,7 +28,18 @@ def combine_two_pdf_files(input_file_1='a.pdf', input_file_2='b.pdf', output_fil
|
|||||||
with open(output_file, 'wb') as combined_file:
|
with open(output_file, 'wb') as combined_file:
|
||||||
output_pdf.write(combined_file)
|
output_pdf.write(combined_file)
|
||||||
|
|
||||||
# 使用pdfminer3k将PDF文件转成文本
|
# 使用pdfplumber将PDF文件转成文本
|
||||||
|
def pdf_to_text_with_pdfplumber(pdf_path):
|
||||||
|
import pdfplumber
|
||||||
|
with pdfplumber.open(pdf_path) as pdf:
|
||||||
|
all_text = []
|
||||||
|
for page in pdf.pages:
|
||||||
|
text = page.extract_text()
|
||||||
|
all_text.append(text)
|
||||||
|
content = "\n\n".join(all_text)
|
||||||
|
return content
|
||||||
|
|
||||||
|
# 使用pdfminer3k将PDF文件转成文本(仅仅支持旧版本的 pdfminer3k)
|
||||||
def pdf_to_text_with_pdfminer3k(pdf_path):
|
def pdf_to_text_with_pdfminer3k(pdf_path):
|
||||||
from pdfminer.pdfparser import PDFParser, PDFDocument
|
from pdfminer.pdfparser import PDFParser, PDFDocument
|
||||||
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
||||||
@@ -115,6 +126,29 @@ def get_links_from_pdf(pdf_path, link_starting_form=''):
|
|||||||
old = u['/A']['/URI']
|
old = u['/A']['/URI']
|
||||||
return links
|
return links
|
||||||
|
|
||||||
|
# 将某个文件夹中的某个类型的文本文件全部修改为另外一个编码,其他文件不变
|
||||||
|
def convert_file_encoding_for_one_directory(source_directory, target_directory, file_formats=['.m'], src_encoding='utf-8', dst_encoding='gb18030'):
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
os.makedirs(target_directory, exist_ok=True)
|
||||||
|
for root, dirs, files in os.walk(source_directory):
|
||||||
|
rel_path = os.path.relpath(root, source_directory)
|
||||||
|
target_subdir = os.path.join(target_directory, rel_path) if rel_path != '.' else target_directory
|
||||||
|
os.makedirs(target_subdir, exist_ok=True)
|
||||||
|
for file in files:
|
||||||
|
src_file = os.path.join(root, file)
|
||||||
|
dst_file = os.path.join(target_subdir, file)
|
||||||
|
if any(file.lower().endswith(ext.lower()) for ext in file_formats):
|
||||||
|
try:
|
||||||
|
with open(src_file, 'r', encoding=src_encoding) as f:
|
||||||
|
content = f.read()
|
||||||
|
with open(dst_file, 'w', encoding=dst_encoding) as f:
|
||||||
|
f.write(content)
|
||||||
|
except Exception as e:
|
||||||
|
shutil.copy2(src_file, dst_file)
|
||||||
|
else:
|
||||||
|
shutil.copy2(src_file, dst_file)
|
||||||
|
|
||||||
# 获取当前日期字符串
|
# 获取当前日期字符串
|
||||||
def get_date(bar=True):
|
def get_date(bar=True):
|
||||||
import datetime
|
import datetime
|
||||||
@@ -770,7 +804,7 @@ def creat_qrcode(data="https://www.guanjihuan.com", filename='a', file_format='.
|
|||||||
img = qrcode.make(data)
|
img = qrcode.make(data)
|
||||||
img.save(filename+file_format)
|
img.save(filename+file_format)
|
||||||
|
|
||||||
# 通过Sci-Hub网站下载文献
|
# 通过Sci-Hub网站下载文献(该方法可能失效)
|
||||||
def download_with_scihub(address=None, num=1):
|
def download_with_scihub(address=None, num=1):
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
import re
|
import re
|
||||||
|
|||||||
Reference in New Issue
Block a user