@@ -1,25 +1,49 @@
# Module: data_processing
# Module: data_processing
# 模型 对话
# AI 对话
def chat ( prompt = ' 你好 ' , model = 1 , stream = 0 , top_p = 0.8 , temperature = 0.85 ) :
def chat ( prompt = ' 你好 ' , stream = 1 , model = 1 , top_p = 0.8 , temperature = 0.85 ) :
import socket
import socket
import json
import json
import time
import guan
with socket . socket ( socket . AF_INET , socket . SOCK_STREAM ) as client_socket :
with socket . socket ( socket . AF_INET , socket . SOCK_STREAM ) as client_socket :
client_socket . settimeout ( 30 )
client_socket . settimeout ( 30 )
client_socket . connect ( ( ' socket.guanjihuan.com ' , 12345 ) )
client_socket . connect ( ( ' socket.guanjihuan.com ' , 12345 ) )
message = {
split_text_list = guan . split_text ( prompt , width = 100 )
' server ' : " chat.guanjihuan.com " ,
message_times = len ( split_text_list )
' prompt ' : prompt ,
if message_times == 1 or message_times == 0 :
' model ' : model ,
message = {
' top_p ' : top_p ,
' server ' : " chat.guanjihuan.com " ,
' temperature ' : temperature ,
' prompt ' : prompt ,
}
' model ' : model ,
send_message = json . dumps ( message )
' top_p ' : top_p ,
client_socket . send ( send_message . encode ( ' utf-8 ' ) )
' temperature ' : temperature ,
}
send_message = json . dumps ( message )
client_socket . send ( send_message . encode ( ' utf-8 ' ) )
else :
end_message = 0
for i0 in range ( message_times ) :
if i0 == message_times - 1 :
end_message = 1
prompt_0 = split_text_list [ i0 ]
message = {
' server ' : " chat.guanjihuan.com " ,
' prompt ' : prompt_0 ,
' model ' : model ,
' top_p ' : top_p ,
' temperature ' : temperature ,
' end_message ' : end_message ,
}
send_message = json . dumps ( message )
client_socket . send ( send_message . encode ( ' utf-8 ' ) )
time . sleep ( 0.2 )
if stream == 1 :
if stream == 1 :
print ( ' \n --- Begin Stream Message --- \n ' )
print ( ' \n --- Begin Chat Stream Message --- \n ' )
response = ' '
response = ' '
while True :
while True :
if prompt == ' ' :
break
try :
try :
data = client_socket . recv ( 1024 )
data = client_socket . recv ( 1024 )
if data != b ' ' :
if data != b ' ' :
@@ -32,48 +56,109 @@ def chat(prompt='你好', model=1, stream=0, top_p=0.8, temperature=0.85):
break
break
else :
else :
if stream == 1 :
if stream == 1 :
print ( stream_response )
print ( stream_response , end = ' ' , flush = True )
except :
except :
break
break
client_socket . close ( )
client_socket . close ( )
if stream == 1 :
if stream == 1 :
print ( ' \n --- End Stream Message --- \n ' )
print ( ' \n \n --- End Chat Stream Message --- \n ' )
return response
return response
# 加上函数代码的 AI 对话
def chat_with_function_code ( function_name , prompt = ' ' , stream = 1 , model = 1 , top_p = 0.8 , temperature = 0.85 ) :
import guan
function_source = guan . get_source ( function_name )
if prompt == ' ' :
response = guan . chat ( prompt = function_source , stream = stream , model = model , top_p = top_p , temperature = temperature )
else :
response = guan . chat ( prompt = function_source + ' \n \n ' + prompt , stream = stream , model = model , top_p = top_p , temperature = temperature )
return response
# 机器人自动对话
def auto_chat ( prompt = ' 你好 ' , round = 2 ) :
import guan
response0 = prompt
for i0 in range ( round ) :
print ( f ' 【对话第 { i0 + 1 } 轮】 \n ' )
print ( ' 机器人 1: ' )
response1 = guan . chat ( prompt = response0 , stream = 1 )
print ( ' 机器人 2: ' )
response0 = guan . chat ( prompt = response1 , stream = 1 )
# 机器人自动对话(引导对话)
def auto_chat_with_guide ( prompt = ' 你好 ' , guide_message = ' ( 回答字数少于30个字, 最后反问我一个问题) ' , round = 5 ) :
import guan
response0 = prompt
for i0 in range ( round ) :
print ( f ' 【对话第 { i0 + 1 } 轮】 \n ' )
print ( ' 机器人 1: ' )
response1 = guan . chat ( prompt = response0 + guide_message , stream = 1 )
print ( ' 机器人 2: ' )
response0 = guan . chat ( prompt = response1 + guide_message , stream = 1 )
# 在云端服务器上运行函数(需要函数是独立可运行的代码)
# 在云端服务器上运行函数(需要函数是独立可运行的代码)
def run ( function_name , * args , * * kwargs ) :
def run ( function_name , * args , * * kwargs ) :
import socket
import socket
import json
import json
import pickle
import base64
import time
import guan
import guan
with socket . socket ( socket . AF_INET , socket . SOCK_STREAM ) as client_socket :
with socket . socket ( socket . AF_INET , socket . SOCK_STREAM ) as client_socket :
client_socket . connect ( ( ' socket .guanjihuan.com' , 12345 ) )
client_socket . connect ( ( ' run .guanjihuan.com' , 12345 ) )
function_source = guan . get_source ( function_name )
function_source = guan . get_source ( function_name )
message = {
split_text_list = guan . split_text ( function_source , width = 100 )
' server ' : " run " ,
message_times = len ( split_text_list )
' function_name ' : function_name . __name__ ,
if message_times == 1 or message_times == 0 :
' function_source ' : function_source ,
message = {
' args ' : str ( args ) ,
' server ' : " run.guanjihuan.com " ,
' kwargs ' : str ( kwargs )
' function_name ' : function_name . __name__ ,
}
' function_source ' : function_source ,
send_message = json . dumps ( message )
' args ' : str ( args ) ,
client_socket . send ( send_message . encode ( ) )
' kwargs ' : str ( kwargs )
return_data = None
}
send_message = json . dumps ( message )
client_socket . send ( send_message . encode ( ) )
else :
end_message = 0
for i0 in range ( message_times ) :
if i0 == message_times - 1 :
end_message = 1
source_0 = split_text_list [ i0 ]
message = {
' server ' : " run " ,
' function_name ' : function_name . __name__ ,
' function_source ' : source_0 ,
' args ' : str ( args ) ,
' kwargs ' : str ( kwargs ) ,
' end_message ' : end_message ,
}
send_message = json . dumps ( message )
client_socket . send ( send_message . encode ( ) )
time . sleep ( 0.2 )
print ( ' \n guan.run: 云端服务器正在计算,请等待返回结果。 \n ' )
return_data = ' '
print_data = ' '
while True :
while True :
try :
try :
data = client_socket . recv ( 1024 )
data = client_socket . recv ( 1024 )
return_text = data . decode ( )
return_text = data . decode ( )
return_dict = json . loads ( return_text )
return_dict = json . loads ( return_text )
return_data = return_dict [ ' return_data ' ]
return_data + = return_dict [ ' return_data ' ]
print_data = return_dict [ ' print_data ' ]
print_data + = return_dict [ ' print_data ' ]
end_message = return_dict [ ' end_message ' ]
end_message = return_dict [ ' end_message ' ]
if print_data ! = ' ' :
if end_message = = 1 :
print ( ' --- Start Print --- \n ' )
print ( print_data )
print ( ' --- End Print --- \n ' )
if end_message == 1 or return_text == ' ' :
break
break
except :
except :
break
break
if print_data != ' ' :
print ( ' --- Start Print --- \n ' )
print ( print_data )
print ( ' --- End Print --- \n ' )
print ( ' guan.run: 云端服务器计算结束,以上是打印结果。 \n ' )
else :
print ( ' guan.run: 云端服务器计算结束。 \n ' )
return_data = pickle . loads ( base64 . b64decode ( return_data ) )
client_socket . close ( )
client_socket . close ( )
return return_data
return return_data
@@ -303,18 +388,23 @@ def print_array_with_index(array, show_index=1, index_type=0):
index + = 1
index + = 1
print ( index , i0 )
print ( index , i0 )
# 根据一定的字符长度来分割文本
def split_text ( text , width = 100 ) :
split_text_list = [ text [ i : i + width ] for i in range ( 0 , len ( text ) , width ) ]
return split_text_list
# 使用textwrap根据一定的字符长度来分割文本( 会自动微小调节宽度, 但存在换行符和空格丢失的问题)
def split_text_with_textwrap ( text , width = 100 ) :
import textwrap
split_text_list = textwrap . wrap ( text , width )
return split_text_list
# 使用jieba软件包进行分词
# 使用jieba软件包进行分词
def divide_text_into_words ( text ) :
def divide_text_into_words ( text ) :
import jieba
import jieba
words = jieba . lcut ( text )
words = jieba . lcut ( text )
return words
return words
# 根据一定的字符长度来分割文本
def split_text ( text , wrap_width = 3000 ) :
import textwrap
split_text_list = textwrap . wrap ( text , wrap_width )
return split_text_list
# 判断某个字符是中文还是英文或其他
# 判断某个字符是中文还是英文或其他
def check_Chinese_or_English ( a ) :
def check_Chinese_or_English ( a ) :
if ' \u4e00 ' < = a < = ' \u9fff ' :
if ' \u4e00 ' < = a < = ' \u9fff ' :
@@ -1197,25 +1287,28 @@ def convert_wordpress_xml_to_markdown(xml_file='./a.xml', convert_content=1, rep
title = item . find ( ' title ' ) . text
title = item . find ( ' title ' ) . text
content = item . find ( ' .//content:encoded ' , namespaces = { ' content ' : ' http://purl.org/rss/1.0/modules/content/ ' } ) . text
content = item . find ( ' .//content:encoded ' , namespaces = { ' content ' : ' http://purl.org/rss/1.0/modules/content/ ' } ) . text
if convert_content == 1 :
if convert_content == 1 :
content = re . sub ( r ' <!--.*?--> ' , ' ' , content )
try :
content = content . replace ( ' <p > ' , ' ' )
content = re . sub ( r ' <!--.*?-- > ' , ' ' , content )
content = content . replace ( ' </ p> ' , ' ' )
content = content . replace ( ' <p> ' , ' ' )
content = content . replace ( ' <ol > ' , ' ' )
content = content . replace ( ' </p > ' , ' ' )
content = content . replace ( ' </ ol> ' , ' ' )
content = content . replace ( ' <ol> ' , ' ' )
content = content . replace ( ' <u l> ' , ' ' )
content = content . replace ( ' </o l> ' , ' ' )
content = content . replace ( ' </ ul> ' , ' ' )
content = content . replace ( ' <ul> ' , ' ' )
content = content . replace ( ' <strong > ' , ' ' )
content = content . replace ( ' </ul > ' , ' ' )
content = content . replace ( ' </ strong> ' , ' ' )
content = content . replace ( ' <strong> ' , ' ' )
content = content . replace ( ' </li > ' , ' ' )
content = content . replace ( ' </strong > ' , ' ' )
content = content . replace ( ' <li> ' , '+ ' )
content = content . replace ( ' </ li> ' , ' ' )
content = content . replace ( ' </h3 > ' , ' ' )
content = content . replace ( ' <li > ' , '+ ' )
content = re . sub ( r ' <h2.*? > ' , ' ## ' , content )
content = content . replace ( ' </h3 > ' , ' ' )
content = re . sub ( r ' <h3 .*?> ' , ' ### ' , content )
content = re . sub ( r ' <h2 .*?> ' , ' ## ' , content )
content = re . sub ( r ' <h4 .*?> ' , ' #### ' , content )
content = re . sub ( r ' <h3 .*?> ' , ' ### ' , content )
for replace_item in replace_more :
content = re . sub ( r ' <h4.*?> ' , ' #### ' , content )
content = content . replace ( replace_item , ' ' )
for replace_item in replace_more :
for _ in range ( 100 ) :
content = content . replace ( replace_item , ' ' )
content = content . replace ( ' \n \n \n ' , ' \n \n ' )
for _ in range ( 100 ) :
content = content . replace ( ' \n \n \n ' , ' \n \n ' )
except :
print ( f ' 提示:字符串替换出现问题!出现问题的内容为: { content } ' )
else :
else :
pass
pass
markdown_content = f " # { title } \n { content } "
markdown_content = f " # { title } \n { content } "