update
This commit is contained in:
76
language_learning/2019.10.10_python_example/python_example.py
Executable file
76
language_learning/2019.10.10_python_example/python_example.py
Executable file
@@ -0,0 +1,76 @@
|
||||
"""
|
||||
This code is supported by the website: https://www.guanjihuan.com
|
||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/417
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
# Python基本操作【循环,判断,函数,文件写入】
|
||||
for i in range(5): # 循环(这里只举例for循环,要了解while循环可自行搜索资料)
|
||||
print('我是循环产生的数:', i) # Python中没有end,所以缩进很重要,不能省!
|
||||
if i == 2: # 判断
|
||||
print('判断:我是第三个数 2')
|
||||
else:
|
||||
pass # pass代表不执行任何语句,用于占位,可以之后再补充,不然空着会报错
|
||||
print() # 空一行
|
||||
|
||||
|
||||
def fun0(arg): # 定义函数
|
||||
print('我是函数中的内容,参数值为:', arg) # \n代表换行
|
||||
return arg*2 # 返回值
|
||||
|
||||
|
||||
print('函数返回值:', fun0(5), '\n') # 调用函数
|
||||
# 关于类class,这里不举例了。科学计算中主要还是面向过程,面向对象用的比较少。有需要了解的可以自行搜索资料。
|
||||
|
||||
# 文件写入
|
||||
# 第一种方式
|
||||
with open('test1.txt', 'w') as f1: # 其中'w'为重新写入,改为'a'是补充内容
|
||||
f1.write(str(100)+'\n这是第一种方式写入文件') # str()为转换成字符串
|
||||
# 第二种方式
|
||||
f2 = open('test2.txt', 'w') # 打开文件
|
||||
f2.write(str(200)+'\n这是第二种方式写入文件') # 写入文件
|
||||
f2.close() # 关闭文件
|
||||
|
||||
|
||||
# Numpy库中常用的语句
|
||||
print('零矩阵:\n', np.zeros((2, 3))) # 注意np.zeros()里需要填元组,因此是两个括号
|
||||
print('单位矩阵:\n', np.identity(3)) # 3行3列的单位矩阵,或者可以用np.eye()
|
||||
print('把一维数组按对角矩阵排列:\n', np.diag([1, 3, 5]), '\n')
|
||||
|
||||
print('指定步长的等差数列:\n', np.arange(1, 5, .5)) # 区间是左闭右开[1, 5)
|
||||
print('指定个数的等差数列:\n', np.linspace(-2, 2, 5), '\n') # 区间是左闭右闭[-2, 2], 数量是5
|
||||
|
||||
print('随机数:\n', np.random.uniform(-2, 2)) # 区间是左闭右开[-2, 2)
|
||||
print('随机整数:\n', np.random.randint(-10, 10), '\n') # 区间是左闭右闭[-10, 10]
|
||||
|
||||
print('数组从小到大排列:\n', np.sort([1, 7, 0, 3]))
|
||||
print('数组从小到大排列对应的索引:\n', np.argsort([1, 7, 0, 3]), '\n') # 注意Python中下标是从0开始的
|
||||
|
||||
matrix0 = np.array([[1, 2+9j, 3], [2, 5, 7]])
|
||||
print('矩阵0:\n', matrix0)
|
||||
print('矩阵的维度:\n', matrix0.shape) # 查看矩阵的维度
|
||||
print('矩阵的行数:\n', matrix0.shape[0]) # 查看矩阵的行数
|
||||
print('矩阵的列数:\n', matrix0.shape[1]) # 查看矩阵的列数
|
||||
print('矩阵转置:\n', matrix0.transpose()) # 矩阵转置
|
||||
print('矩阵转置共轭:\n', matrix0.transpose().conj(), '\n') # 矩阵转置共轭
|
||||
|
||||
matrix1 = np.array([[3, 5], [2, 7]]) # numpy数组
|
||||
eigenvalue, eigenvector = np.linalg.eig(matrix1) # 求本征值,本征向量
|
||||
print('矩阵1:\n', matrix1)
|
||||
print('本征值:\n', eigenvalue)
|
||||
print('本征向量:\n', eigenvector) # 列向量对应的是本征矢量
|
||||
print('逆矩阵:\n', np.linalg.inv(matrix1))
|
||||
print('计算行列式:\n', np.linalg.det(matrix1), '\n')
|
||||
|
||||
matrix2 = np.array([[1, 2], [3, 4]])
|
||||
print('矩阵2:\n', matrix2)
|
||||
print('矩阵1和矩阵2相乘:\n', np.matmul(matrix1, matrix2), '\n') # 矩阵乘积,或者可以用np.dot()
|
||||
|
||||
a = np.array([1, 2])
|
||||
print('数组a=', a)
|
||||
b = np.array([3, 4])
|
||||
print('数组b=', b)
|
||||
print('增加元素:\n', np.append(a, b, axis=0)) # 增加元素
|
||||
print('增加行:\n', np.append([a], [b], axis=0)) # 增加行(列数要相同),或者用np.row_stack(([a], [b]))
|
||||
print('增加列:\n', np.append([a], [b], axis=1)) # 增加列(行数要相同),或者用np.column_stack(([a], [b]))
|
39
language_learning/2019.10.11_0_tensorflow_example/tensorflow.py
Executable file
39
language_learning/2019.10.11_0_tensorflow_example/tensorflow.py
Executable file
@@ -0,0 +1,39 @@
|
||||
import tensorflow as tf # 导入tensorflow
|
||||
|
||||
greeting = tf.constant('Hello Google Tensorflow!') # 定义一个常量
|
||||
|
||||
# 第一种方式
|
||||
sess = tf.Session() # 启动一个会话
|
||||
result = sess.run(greeting) # 使用会话执行greeting计算模块
|
||||
print(result) # 打印显示
|
||||
sess.close() # 关闭会话
|
||||
|
||||
# 第二种方式
|
||||
with tf.Session() as sess: # 启动一个会话
|
||||
print(sess.run(greeting)) # 打印显示
|
||||
|
||||
|
||||
# 例子1:
|
||||
matrix1 = tf.constant([[1., 3.]]) # 定义常数矩阵1 tf.constant()
|
||||
matrix2 = tf.constant([[2.], [2.]]) # 定义常数矩阵2 tf.constant()
|
||||
product = tf.matmul(matrix1, matrix2) # 矩阵乘积 tf.matmul()
|
||||
linear = tf.add(product, tf.constant(2.)) # 矩阵乘积后再加上一个常数 tf.add()
|
||||
with tf.Session() as sess: # 启动一个会话 tf.Session()
|
||||
print(sess.run(matrix1)) # 执行语句并打印显示 tf.Session().run
|
||||
print(sess.run(linear)) # 执行语句并打印显示 tf.Session().run
|
||||
print(linear) # 直接打印是不能看到计算结果的,因为还未执行,只是一个张量。这里打印显示的结果是:Tensor("Add:0", shape=(1, 1), dtype=float32)
|
||||
|
||||
|
||||
# 例子2:变量tf.Variable()
|
||||
state = tf.Variable(3, name='counter') # 变量tf.Variable
|
||||
init = tf.global_variables_initializer() # 如果定义了变量,后面一定要有这个语句,用来初始化变量。
|
||||
with tf.Session() as sess:
|
||||
sess.run(init) # 变量一定要初始化变量
|
||||
print(sess.run(state)) # 执行语句并打印显示
|
||||
|
||||
# 例子3:占位符tf.placeholder(),用来临时占坑,需要用feed_dict来传入数值。
|
||||
x1 = tf.placeholder(tf.float32)
|
||||
x2 = tf.placeholder(tf.float32)
|
||||
y = x1 + x2
|
||||
with tf.Session() as sess:
|
||||
print(sess.run(y, feed_dict={x1: 7, x2: 2}))
|
@@ -0,0 +1,77 @@
|
||||
import tensorflow as tf
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
def add_layer(inputs, in_size, out_size, activation_function=None): # 定义一层的所有神经元
|
||||
Weights = tf.Variable(tf.random_normal([in_size, out_size])) # 定义Weights为tf变量,并给予初值
|
||||
biases = tf.Variable(tf.zeros([1, out_size]) + 0.1) # 定义biases为tf变量,并给予初值
|
||||
Wx_plus_b = tf.matmul(inputs, Weights) + biases # 得分
|
||||
if activation_function is None: # 没有激活函数
|
||||
outputs = Wx_plus_b
|
||||
else:
|
||||
outputs = activation_function(Wx_plus_b) # 使用激活函数
|
||||
return outputs # 返回该层每个神经元的输出值(维度为out_size)
|
||||
|
||||
|
||||
# 产生训练的数据
|
||||
x_data = np.linspace(-1, 1, 300, dtype=np.float32)[:, np.newaxis] # 产生数据,作为神经网络的输入数据。注:[:, np.newaxis]是用来增加一个轴,变成一个矩阵。
|
||||
noise = np.random.normal(0, 0.05, x_data.shape).astype(np.float32) # 产生噪声
|
||||
y_data = np.square(x_data) - 0.5 + noise # x_data加上噪声,作为神经网络的输出数据。
|
||||
print(x_data.shape) # 查看数据维度
|
||||
print(noise.shape) # 查看数据维度
|
||||
print(y_data.shape) # 查看数据维度
|
||||
print() # 打印输出空一行
|
||||
|
||||
|
||||
# 神经网络模型的建立
|
||||
xs = tf.placeholder(tf.float32, [None, 1]) # 定义占位符,为神经网络训练的输入数据。这里的None代表无论输入有多少数据都可以
|
||||
ys = tf.placeholder(tf.float32, [None, 1]) # 定义占位符,为神经网络训练的输出数据。
|
||||
l1 = add_layer(xs, 1, 10, activation_function=tf.nn.relu) # 增加一个隐藏层
|
||||
prediction = add_layer(l1, 10, 1, activation_function=None) # 输出层
|
||||
loss = tf.reduce_mean(tf.reduce_sum(tf.square(ys - prediction), reduction_indices=[1])) # 损失函数
|
||||
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss) # 梯度下降
|
||||
init = tf.global_variables_initializer() # 变量初始化
|
||||
|
||||
# 画出原始的输入输出数据点图
|
||||
fig = plt.figure()
|
||||
ax = fig.add_subplot(1, 1, 1)
|
||||
ax.scatter(x_data, y_data)
|
||||
plt.ion() # 开启交互模式
|
||||
plt.show() # 显示图像
|
||||
|
||||
# 训练神经网络模型
|
||||
sess = tf.Session() # 启动一个会话
|
||||
sess.run(init) # 初始化变量
|
||||
for i in range(1000): # 训练1000次
|
||||
sess.run(train_step, feed_dict={xs: x_data, ys: y_data}) # 喂数据,梯度下降循环1000次。
|
||||
if i % 50 == 0: # 每训练50次画一下图
|
||||
try: # to visualize the result and improvement
|
||||
ax.lines.remove(lines[0])
|
||||
except Exception:
|
||||
pass
|
||||
prediction_value = sess.run(prediction, feed_dict={xs: x_data}) # 神经网络预测的值
|
||||
print('loss=', sess.run(loss, feed_dict={xs: x_data, ys: y_data})) # 打印输出,查看损失函数下降情况
|
||||
print('prediction=', sess.run(prediction, feed_dict={xs: [x_data[0, :]]})) # # 打印输出神经网络预测的值
|
||||
print() # 打印空一行
|
||||
lines = ax.plot(x_data, prediction_value, 'r-', lw=5) # 画出预测的值,用线连起来
|
||||
plt.pause(.1) # 暂停0.1,防止画图过快看不清。
|
||||
plt.ioff() # 关闭交互模式,再画一次图。作用是不让图自动关掉。
|
||||
lines = ax.plot(x_data, prediction_value, 'r-', lw=5)
|
||||
plt.show()
|
||||
|
||||
|
||||
# 保存训练好的神经网络模型tf.train.Saver()
|
||||
saver = tf.train.Saver()
|
||||
save_path = saver.save(sess, "my_net/save_net.ckpt") # 保存模型
|
||||
print("Save to path: ", save_path)
|
||||
print()
|
||||
sess.close() # 关闭会话
|
||||
|
||||
|
||||
# 调用神经网络模型,来预测新的值
|
||||
with tf.Session() as sess2:
|
||||
saver.restore(sess2, "my_net/save_net.ckpt") # 提取模型中的所有变量
|
||||
print(y_data[0, :]) # 输出的原始值
|
||||
print(sess2.run(prediction, feed_dict={xs: [x_data[0, :]]})) # 预测值
|
||||
|
359
language_learning/2019.10.27_0_ball_games_with_pygame/ball_games_with_pygame.py
Executable file
359
language_learning/2019.10.27_0_ball_games_with_pygame/ball_games_with_pygame.py
Executable file
@@ -0,0 +1,359 @@
|
||||
"""
|
||||
This code is supported by the website: https://www.guanjihuan.com
|
||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/703
|
||||
"""
|
||||
|
||||
import pygame
|
||||
import random
|
||||
import math
|
||||
import numpy as np
|
||||
|
||||
# 参数
|
||||
screen_width = 1500 # 屏幕宽度
|
||||
screen_height = 900 # 屏幕高度
|
||||
map_width = screen_width*4 # 地图的大小
|
||||
map_height = screen_height*4 # 地图的大小
|
||||
number_enemy = map_width*map_height/500000 # 敌人的数量
|
||||
number_dots = map_width * map_height / 50 # 点点的数量
|
||||
max_show_size = 100 # 球显示的最大半径(屏幕有限,球再增大时,改变的地图比例尺寸)
|
||||
|
||||
my_value = 1000 # 我的初始值
|
||||
enemy_value_low = 500 # 敌人的初始值(最低)
|
||||
enemy_value_high = 1500 # 敌人的初始值(最高)
|
||||
dot_value = 30 # 点点的值(地上的豆豆/食物值)
|
||||
my_speed = 10 # 我的球运动的速度
|
||||
speed_up = 20 # 按下鼠标时加速
|
||||
speed_enemy = 10 # 敌人球正常运动速度
|
||||
speed_enemy_anomaly = 20 # 敌人突然加速时的速度(速度异常时的速度)
|
||||
anomaly_pro = 0.5 # 敌人加速的概率
|
||||
change_pro = 0.05 # 敌人移动路径变化的概率,也就是1/change_pro左右会变化一次
|
||||
eat_percent = 0.9 # 吃掉敌人的球,按多少比例并入自己的体积,1对应的是100%
|
||||
loss = 0.001 # 按比例减小体重(此外越重的减少越多,10万体积损失值为loss的一倍)
|
||||
enemy_bigger_pro = 0.0005 # 敌人的值增加了我的球的值的enemy_bigger_rate倍的几率
|
||||
enemy_bigger_rate = 0.1 # 增加我的球的体积的enemy_bigger_rate倍
|
||||
|
||||
|
||||
class Color(object): # 定义颜色的类
|
||||
@classmethod # 加了这个可以不需要把实例化,能直接调用类的方法
|
||||
def random_color(cls): # cls, 即class,表示可以通过类名直接调用
|
||||
red = random.randint(0, 255)
|
||||
green = random.randint(0, 255)
|
||||
blue = random.randint(0, 255)
|
||||
return red, green, blue
|
||||
|
||||
|
||||
class Ball(object): # 定义球
|
||||
def __init__(self, x, y, sx, sy, color, value): # 初始化
|
||||
self.x = x # 球的地图位置参数
|
||||
self.y = y
|
||||
self.sx = sx # 速度参数
|
||||
self.sy = sy
|
||||
self.color = color # 颜色
|
||||
self.value = value # 球的值,也就是球的大小(不是显示的大小)
|
||||
self.is_alive = True # 球默认是存活状态
|
||||
|
||||
|
||||
class My_Ball(Ball): # 定义我的球,继承了Ball类的方法
|
||||
def __init__(self, x, y, sx, sy, color, value):
|
||||
# 注意:如果重写了__init__() 时,实例化子类,就不会调用父类已经定义的__init__()
|
||||
# 如果子类不重写__init__()方法,实例化子类后,会自动调用父类的__init__()的方法
|
||||
# 如果子类重写__init__()方法又需要调用父类的方法,则要使用super关键词。
|
||||
super().__init__(x, y, sx, sy, color, value) # 调用父类Ball的初始化方法__init__()
|
||||
self.radius = int(self.value**0.5) # 我的球的半径(不考虑系数pi)
|
||||
if self.radius >= max_show_size: # 如果半径比规定的最大半径还大,则显示最大半径
|
||||
self.show_radius = max_show_size # 我的球显示的半径
|
||||
else:
|
||||
self.show_radius = self.radius # 如果半径没有超过规定最大的半径,则显示原来实际大小的半径
|
||||
self.position_x = int(screen_width/2) # 把我的球固定在屏幕中间position_x,是屏幕显示的位置
|
||||
self.position_y = int(screen_height/2) # 把我的球固定在屏幕中间position_y,是屏幕显示的位置
|
||||
|
||||
def draw(self, window): # 把我的球画出来
|
||||
self.radius = int(self.value ** 0.5) # 这里重复上面的,因为除了初始化之后,还要更新
|
||||
if self.radius >= max_show_size:
|
||||
self.show_radius = max_show_size
|
||||
else:
|
||||
self.show_radius = self.radius
|
||||
self.position_x = int(screen_width / 2)
|
||||
self.position_y = int(screen_height / 2)
|
||||
pygame.draw.circle(window, self.color, (self.position_x , self.position_y), self.show_radius)
|
||||
|
||||
def eat_ball(self, other): # 吃别的球(包括小点点和敌人)
|
||||
if self != other and self.is_alive and other.is_alive: # 如果other不是自身,自身和对方也都是存活状态,则执行下面动作
|
||||
distance = ((self.position_x - other.position_x) ** 2 + (self.position_y - other.position_y) ** 2) ** 0.5 # 两个球之间的距离
|
||||
if distance < self.show_radius and (self.show_radius > other.show_radius or (self.show_radius == other.show_radius and self.value > other.value)): # 如果自身半径比别人大,而且两者距离小于自身半径,那么可以吃掉。
|
||||
other.is_alive = False # 吃球(敌方已死)
|
||||
self.value += other.value*eat_percent # 自己的值增大(体量增大)
|
||||
self.radius = int(self.value ** 0.5) # 计算出半径
|
||||
if self.radius >= max_show_size: # 我的球的显示半径
|
||||
self.show_radius = max_show_size
|
||||
else:
|
||||
self.show_radius = self.radius
|
||||
|
||||
def move(self): # 移动规则
|
||||
self.x += self.sx # 地图位置加上速度
|
||||
self.y += self.sy
|
||||
# 横向出界
|
||||
if self.x < 0: # 离开了地图左边
|
||||
self.x = 0
|
||||
if self.x > map_width: # 离开了地图右边
|
||||
self.x = map_width
|
||||
# 纵向出界
|
||||
if self.y <= 0: # 离开了地图下边
|
||||
self.y = 0
|
||||
if self.y >= map_height: # 离开了地图上边
|
||||
self.y = map_height
|
||||
|
||||
|
||||
class Enemy_Ball(Ball): # 定义敌人的球,继承了Ball类的方法
|
||||
def __init__(self, x, y, sx, sy, color, value, host_ball): # 初始化带上host_ball,也就是我的球
|
||||
super().__init__(x, y, sx, sy, color, value)
|
||||
self.host_ball = host_ball
|
||||
self.radius = int(self.value**0.5)
|
||||
if self.host_ball.radius >= max_show_size: # 如果我的球比规定的最大尺寸还大,则敌人的球显示的比例要减小
|
||||
self.show_radius = max(10, int(self.radius/(self.host_ball.radius/max_show_size))) # 敌人的球也不能太小,最小半径为10
|
||||
self.position_x = int((self.x - self.host_ball.x) / (self.host_ball.radius / max_show_size)) + int(
|
||||
screen_width / 2) # 计算出敌人的球和我的球的相对位置,并且按比例减小
|
||||
self.position_y = int((self.y - self.host_ball.y) / (self.host_ball.radius / max_show_size)) + int(
|
||||
screen_height / 2) # 计算出敌人的球和我的球的相对位置,并且按比例减小
|
||||
else:
|
||||
self.show_radius = self.radius # 正常显示
|
||||
self.position_x = (self.x - self.host_ball.x) + int(screen_width / 2) # 敌人和我的球的相对位置
|
||||
self.position_y = (self.y - self.host_ball.y) + int(screen_height / 2) # 敌人和我的球的相对位置
|
||||
|
||||
# 画出球
|
||||
def draw(self, window):
|
||||
self.radius = int(self.value ** 0.5)
|
||||
if self.host_ball.radius >= max_show_size: # 这边把初始化的内容再写一遍,因为敌人的球初始化之后还要根据我的球而动态改变
|
||||
self.show_radius = max(10, int(self.radius/(self.host_ball.radius/max_show_size)))
|
||||
self.position_x = int((self.x - self.host_ball.x) / (self.host_ball.radius / max_show_size)) + int(
|
||||
screen_width / 2)
|
||||
self.position_y = int((self.y - self.host_ball.y) / (self.host_ball.radius / max_show_size)) + int(
|
||||
screen_height / 2)
|
||||
else:
|
||||
self.show_radius = self.radius
|
||||
self.position_x = (self.x - self.host_ball.x) + int(screen_width / 2)
|
||||
self.position_y = (self.y - self.host_ball.y) + int(screen_height / 2)
|
||||
pygame.draw.circle(window, self.color, (self.position_x, self.position_y), self.show_radius)
|
||||
|
||||
def eat_ball(self, other):
|
||||
if self != other and self.is_alive and other.is_alive:
|
||||
distance = ((self.position_x - other.position_x) ** 2 + (self.position_y - other.position_y) ** 2) ** 0.5
|
||||
if distance < self.show_radius and (self.show_radius > other.show_radius or (self.show_radius == other.show_radius and self.value > other.value)):
|
||||
other.is_alive = False # 吃球
|
||||
self.value += other.value*eat_percent
|
||||
self.radius = int(self.value ** 0.5)
|
||||
|
||||
def move(self): # 移动规则
|
||||
self.x += self.sx # 地图位置加上速度
|
||||
self.y += self.sy
|
||||
# 横向出界
|
||||
if self.x < 0: # 离开了地图左边
|
||||
self.sx = -self.sx
|
||||
self.x = 0
|
||||
if self.x > map_width: # 离开了地图右边
|
||||
self.sx = -self.sx
|
||||
self.x = map_width
|
||||
# 纵向出界
|
||||
if self.y <= 0: # 离开了地图下边
|
||||
self.sy = -self.sy
|
||||
self.y = 0
|
||||
if self.y >= map_height: # 离开了地图上边
|
||||
self.sy = -self.sy
|
||||
self.y = map_height
|
||||
|
||||
|
||||
class Dot_Ball(Ball): # 定义地上的小点点,供自己的球和敌人的球吃,继承了Ball类的方法
|
||||
def __init__(self, x, y, sx, sy, color, value, host_ball):
|
||||
super().__init__(x, y, sx, sy, color, value)
|
||||
self.host_ball = host_ball
|
||||
self.radius = 8 # 初始小点点大小
|
||||
if self.host_ball.radius >= max_show_size:
|
||||
self.show_radius = max(3, int(self.radius/(self.host_ball.radius/max_show_size))) # 小点点显示也不能太小,最小显示半径为3
|
||||
self.position_x = int((self.x - self.host_ball.x) / (self.host_ball.radius / max_show_size)) + int(
|
||||
screen_width / 2)
|
||||
self.position_y = int((self.y - self.host_ball.y) / (self.host_ball.radius / max_show_size)) + int(
|
||||
screen_height / 2)
|
||||
else:
|
||||
self.show_radius = self.radius
|
||||
self.position_x = (self.x - self.host_ball.x) + int(screen_width / 2)
|
||||
self.position_y = (self.y - self.host_ball.y) + int(screen_height / 2)
|
||||
|
||||
# 画出球
|
||||
def draw(self, window):
|
||||
if self.host_ball.radius >= max_show_size: # 这边把初始化的内容再写一遍,因为小点点初始化之后还要根据我的球而动态改变
|
||||
self.show_radius = max(3, int(self.radius/(self.host_ball.radius/max_show_size)))
|
||||
self.position_x = int((self.x - self.host_ball.x) / (self.host_ball.radius / max_show_size)) + int(
|
||||
screen_width / 2)
|
||||
self.position_y = int((self.y - self.host_ball.y) / (self.host_ball.radius / max_show_size)) + int(
|
||||
screen_height / 2)
|
||||
else:
|
||||
self.show_radius = self.radius
|
||||
self.position_x = (self.x - self.host_ball.x) + int(screen_width / 2)
|
||||
self.position_y = (self.y - self.host_ball.y) + int(screen_height / 2)
|
||||
pygame.draw.circle(window, self.color, (self.position_x, self.position_y) , self.show_radius)
|
||||
|
||||
|
||||
def creat_my_ball(): # 产生我的球
|
||||
x = random.randint(0, map_width) # 我的球在地图中的位置,随机生成
|
||||
y = random.randint(0, map_height)
|
||||
value = my_value # 我的球的初始值
|
||||
color = 255, 255, 255 # 我的球的颜色
|
||||
sx = 0 # 速度默认为0
|
||||
sy = 0
|
||||
host_ball = My_Ball(x, y, sx, sy, color, value) # 调用My_Ball类
|
||||
return host_ball # 返回我的球
|
||||
|
||||
|
||||
def auto_creat_ball(balls, host_ball): # 自动产生敌人的球
|
||||
if len(balls) <= number_enemy: # 控制敌人的数量,如果个数够了,就不再生成
|
||||
x = random.randint(0, map_width) # 敌人球在地图中的位置,随机生成
|
||||
y = random.randint(0, map_height)
|
||||
value = random.randint(enemy_value_low, enemy_value_high) # 敌人的球初始值
|
||||
sx = random.randint(-speed_enemy, speed_enemy) # 敌人的球移动速度
|
||||
i2 = random.randint(0, 1) # y的移动方向
|
||||
if i2 == 0:
|
||||
sy = int((speed_enemy**2 - sx**2) ** 0.5)
|
||||
else:
|
||||
sy = -int((speed_enemy ** 2 - sx ** 2) ** 0.5)
|
||||
color = Color.random_color() # 敌人的颜色随机生成
|
||||
enemy = Enemy_Ball(x, y, sx, sy, color, value, host_ball)
|
||||
balls.append(enemy)
|
||||
|
||||
|
||||
def auto_creat_dots(dots, host_ball): # 自动生成点点
|
||||
if len(dots) <= number_dots: # 控制点点的数量
|
||||
x = random.randint(0, map_width) # 随机生成点点的位置
|
||||
y = random.randint(0, map_height)
|
||||
value = dot_value # 点点的值
|
||||
sx = 0 # 点点速度为0
|
||||
sy = 0
|
||||
color = Color.random_color() # 颜色
|
||||
dot = Dot_Ball(x, y, sx, sy, color, value, host_ball)
|
||||
dots.append(dot)
|
||||
|
||||
|
||||
def control_my_ball(host_ball): # 控制我的球
|
||||
host_ball.move()
|
||||
host_ball.value = host_ball.value*(1-loss*host_ball.value/100000)
|
||||
for event in pygame.event.get(): # 监控事件(鼠标移动)
|
||||
# print(event)
|
||||
if event.type == pygame.MOUSEBUTTONDOWN:
|
||||
pos = event.pos
|
||||
speed = speed_up
|
||||
elif event.type == pygame.MOUSEMOTION:
|
||||
pos = event.pos
|
||||
if event.buttons[0] == 1:
|
||||
speed = speed_up
|
||||
if event.buttons[0] == 0:
|
||||
speed = my_speed
|
||||
elif event.type == pygame.MOUSEBUTTONUP:
|
||||
pos = event.pos
|
||||
speed = my_speed
|
||||
else:
|
||||
pos = [screen_width/2, screen_height/2]
|
||||
speed = my_speed
|
||||
if abs(pos[0] - screen_width/2) < 30 and abs(pos[1] - screen_height/2) < 30:
|
||||
host_ball.sx = 0
|
||||
host_ball.sy = 0
|
||||
elif pos[0] > screen_width/2 and pos[1] >= screen_height/2:
|
||||
angle = abs(math.atan((pos[1] - screen_height/2) / (pos[0] - screen_width/2)))
|
||||
host_ball.sx = int(speed * math.cos(angle))
|
||||
host_ball.sy = int(speed * math.sin(angle))
|
||||
elif pos[0] > screen_width/2 and pos[1] < screen_height/2:
|
||||
angle = abs(math.atan((pos[1] - screen_height/2) / (pos[0] - screen_width/2)))
|
||||
host_ball.sx = int(speed * math.cos(angle))
|
||||
host_ball.sy = -int(speed * math.sin(angle))
|
||||
elif pos[0] < screen_width/2 and pos[1] >= screen_height/2:
|
||||
angle = abs(math.atan((pos[1] - screen_height/2) / (pos[0] - screen_width/2)))
|
||||
host_ball.sx = -int(speed * math.cos(angle))
|
||||
host_ball.sy = int(speed * math.sin(angle))
|
||||
elif pos[0] < screen_width/2 and pos[1] < screen_height/2:
|
||||
angle = abs(math.atan((pos[1] - screen_height/2) / (pos[0] - screen_width/2)))
|
||||
host_ball.sx = -int(speed * math.cos(angle))
|
||||
host_ball.sy = -int(speed * math.sin(angle))
|
||||
elif pos[0] == screen_width/2:
|
||||
host_ball.sx = 0
|
||||
if pos[1] >= 0:
|
||||
host_ball.sy = speed
|
||||
else:
|
||||
host.ball.sy = -speed
|
||||
|
||||
|
||||
def enemy_move(balls, host_ball): # 敌人移动
|
||||
for enemy in balls:
|
||||
enemy.move() # 移动
|
||||
enemy.value = enemy.value*(1-loss*enemy.value/100000)
|
||||
if random.randint(1, int(1/enemy_bigger_pro)) == 1:
|
||||
enemy.value += host_ball.value*enemy_bigger_rate
|
||||
if random.randint(1, int(1/anomaly_pro)) == 1:
|
||||
speed_enemy0 = speed_enemy_anomaly # 敌人异常速度
|
||||
else:
|
||||
speed_enemy0 = speed_enemy # 敌人正常速度
|
||||
i = random.randint(1, int(1/change_pro)) # 一定的概率改变轨迹
|
||||
if i == 1:
|
||||
enemy.sx = random.randint(-speed_enemy0, speed_enemy0)
|
||||
i2 = random.randint(0, 1)
|
||||
if i2 == 0:
|
||||
enemy.sy = int((speed_enemy0 ** 2 - enemy.sx ** 2) ** 0.5)
|
||||
else:
|
||||
enemy.sy = -int((speed_enemy0 ** 2 - enemy.sx ** 2) ** 0.5)
|
||||
|
||||
|
||||
def eat_each_other(host_ball, balls, dots): # 吃球
|
||||
for enemy in balls:
|
||||
for enemy2 in balls:
|
||||
enemy.eat_ball(enemy2) # 敌人互吃
|
||||
for food in dots:
|
||||
enemy.eat_ball(food) # 敌人吃点点
|
||||
for enemy in balls:
|
||||
host_ball.eat_ball(enemy) # 我吃敌人
|
||||
enemy.eat_ball(host_ball) # 敌人吃我
|
||||
for food in dots:
|
||||
host_ball.eat_ball(food) # 我吃点点
|
||||
|
||||
|
||||
def paint(host_ball, balls, dots, screen):
|
||||
screen.fill((0, 0, 0)) # 刷漆
|
||||
if host_ball.is_alive:
|
||||
host_ball.draw(screen)
|
||||
for enemy in balls: # 遍历容器
|
||||
if enemy.is_alive:
|
||||
enemy.draw(screen)
|
||||
else:
|
||||
balls.remove(enemy)
|
||||
for food in dots: # 遍历容器
|
||||
if food.is_alive:
|
||||
food.draw(screen)
|
||||
else:
|
||||
dots.remove(food)
|
||||
|
||||
|
||||
def main():
|
||||
pygame.init() # 初始化
|
||||
screen = pygame.display.set_mode((screen_width, screen_height)) # 设置屏幕
|
||||
pygame.display.set_caption("球球大作战") # 设置屏幕标题
|
||||
balls = [] # 定义一容器 存放所有的敌方球
|
||||
dots = [] # 定义一容器 存放所有的点点
|
||||
is_running = True # 默认运行状态
|
||||
host_ball = creat_my_ball() # 产生我的球
|
||||
i00 = 0 # 一个参数
|
||||
while is_running:
|
||||
for event in pygame.event.get():
|
||||
if event.type == pygame.QUIT:
|
||||
is_running = False
|
||||
auto_creat_dots(dots, host_ball) # 自动生成点点
|
||||
auto_creat_ball(balls, host_ball) # 自动生成敌人
|
||||
paint(host_ball, balls, dots, screen) # 把所有的都画出来 调用draw方法
|
||||
pygame.display.flip() # 渲染
|
||||
pygame.time.delay(30) # 设置动画的时间延迟
|
||||
|
||||
control_my_ball(host_ball) # 移动我的球
|
||||
enemy_move(balls, host_ball) # 敌人的球随机运动
|
||||
eat_each_other(host_ball, balls, dots) # 吃球 调用eat_ball方法
|
||||
i00 += 1
|
||||
if np.mod(i00, 50) == 0:
|
||||
print(host_ball.value)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@@ -0,0 +1,106 @@
|
||||
"""
|
||||
This code is supported by the website: https://www.guanjihuan.com
|
||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/706
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import time
|
||||
import matplotlib.pyplot as plt
|
||||
import tushare as ts
|
||||
|
||||
|
||||
def main():
|
||||
start_clock = time.perf_counter()
|
||||
pro = ts.pro_api('到官网上注册,寻找Token填在这里!')
|
||||
print('\n我的策略:见好就收,遇低抄底。\n'
|
||||
' 【卖出】买入后涨了5%就卖出\n'
|
||||
' 【买入】卖出后跌了5%就买入\n'
|
||||
'注:第一天必须买进,最后一天前必须卖出(为了与不操作的做对比)\n')
|
||||
number = 1
|
||||
for i in range(number):
|
||||
data = pro.stock_basic(exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date') # 所有股票列表
|
||||
# print(data.columns) # 查看该数据的表头
|
||||
# print(data) # 3688多行的股票数据
|
||||
i = 1 # 查看第二行数据“万科A”股
|
||||
ts_code = data.values[i, 0] # 股票代码
|
||||
stock = data.values[i, 2] # 股票名称
|
||||
industry = data.values[i, 4] # 属于哪个行业
|
||||
start_date = '20110101' # 开始时间
|
||||
end_date = '20191027' # 结束时间
|
||||
df = pro.daily(ts_code=ts_code, start_date=start_date, end_date=end_date) # 查看该股票的日线数据
|
||||
# print(df.columns) # 查看该数据的表头
|
||||
# print(df) # 查看该股票的日线数据
|
||||
close = np.array(list(reversed(df.values[:, 5]))) # 提取出收盘价,并按时间顺序排列,从过去到现在
|
||||
pct_chg = np.array(list(reversed(df.values[:, 8]))) # 提取出涨跌幅,并按时间顺序排列,从过去到现在
|
||||
# print(df.columns[5], '=', close, '\n') # 查看收盘价
|
||||
# print(df.columns[8], '=', pct_chg, '\n') # 查看涨跌幅
|
||||
profit, profit_no_operation, times, invest_money, buy_time_all, sell_time_all = back_test(close.shape[0], close, pct_chg)
|
||||
# 调用回测函数,返回了“利润,未操作的利润, 按该策略操作了几次, 总投资金额, 按该策略买的时间, 按该策略卖的时间”的值
|
||||
print('\n------股票:', stock, ts_code, industry, '[买入市值=%7.2f' % invest_money, ']------')
|
||||
print('回测时间段:', start_date, '-', end_date)
|
||||
print('操作后利润= %6.2f' % profit, ' 买入(卖出)次数=', times, ' ')
|
||||
print('不操作利润= %6.2f' % profit_no_operation, '(第一天买入,最后一天卖出,中间未操作)')
|
||||
end_clock = time.perf_counter()
|
||||
print('CPU执行时间=', end_clock - start_clock, 's')
|
||||
plt.figure(1)
|
||||
plt.title('Stock Code: '+ts_code+' (red point: buy, green point: sell)')
|
||||
plt.grid()
|
||||
plt.plot(range(close.shape[0]), close, '-')
|
||||
for i in buy_time_all:
|
||||
plt.plot(i, close[int(i)], 'or', markersize=13) # 红色是买进的点
|
||||
for i in sell_time_all:
|
||||
plt.plot(i, close[int(i)], 'dg', markersize=13) # 绿色是卖出的点
|
||||
plt.show()
|
||||
|
||||
|
||||
def back_test(days, close, pct_chg, money_in=10000): # 定义该策略的回测效果(按旧数据检查该策略是否有效)
|
||||
money_in_amount = int(money_in/close[0]) # 投资金额换算成股票股数
|
||||
invest_money = close[0]*money_in_amount # 实际买了股票的金额
|
||||
profit_no_operation = (close[close.shape[0]-1]-close[0])*money_in_amount # 不操作的利润
|
||||
position = -1 # 买入还是卖出的状态,默认卖出
|
||||
total_profit = 0
|
||||
times = 0
|
||||
current_buy_pct = -999
|
||||
current_sell_pct = 999
|
||||
buy_time_all = np.array([])
|
||||
sell_time_all = np.array([])
|
||||
for i in range(days): # 总天数
|
||||
if i == 0: # 第一天,满仓买买买!为了和不操作的对比,第一天就要买入。
|
||||
buy_time = i # 买入时间
|
||||
buy_time_all = np.append(buy_time_all, [buy_time], axis=0) # 买入时间存档
|
||||
position = 1 # 标记为买入状态
|
||||
print('------------------第', buy_time, '天买进-------------')
|
||||
else:
|
||||
profit = 0
|
||||
if position == 1: # 买入状态
|
||||
current_buy_pct = (close[i]-close[buy_time])/close[buy_time]*100 # 买入后的涨跌情况
|
||||
# print('当前买进后的涨跌情况:第', i, '天=', current_buy_pct)
|
||||
if position == 0: # 卖出状态
|
||||
current_sell_pct = (close[i]-close[sell_time])/close[sell_time]*100 # 卖出后的涨跌情况
|
||||
|
||||
if current_sell_pct < -5 and position == 0: # 卖出状态,且卖出后跌了有3%,这时候买入
|
||||
buy_time = i # 买入时间
|
||||
buy_time_all = np.append(buy_time_all, [buy_time], axis=0) # 买入时间存档
|
||||
print('------------------第', buy_time, '天买进-------------')
|
||||
position = 1 # 标记为买入状态
|
||||
continue
|
||||
|
||||
if current_buy_pct > 5 and position == 1: # 买入状态,且买入后涨了有3%,这时候卖出
|
||||
sell_time = i # 卖出时间
|
||||
sell_time_all = np.append(sell_time_all, [sell_time], axis=0) # 卖出时间存档
|
||||
print('----------第', sell_time, '天卖出,持有天数:', sell_time-buy_time, '--------------\n')
|
||||
position = 0 # 标记为卖出状态
|
||||
profit = close[sell_time]-close[buy_time] # 赚取利润
|
||||
times = times + 1 # 买入(卖出)次数加1
|
||||
total_profit = total_profit + profit*money_in_amount # 计算总利润
|
||||
if position == 1: # 最后一天如果是买入状态,则卖出
|
||||
profit = close[i]-close[buy_time] # 赚取利润
|
||||
total_profit = total_profit + profit # 计算总利润
|
||||
times = times + 1 # 买入(卖出)次数加1
|
||||
print('--------------第', i, '天(最后一天)卖出,持有天数:', sell_time-buy_time, '--------------\n')
|
||||
sell_time_all = np.append(sell_time_all, [i], axis=0) # 卖出时间存档
|
||||
return total_profit, profit_no_operation, times, invest_money, buy_time_all, sell_time_all
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
41
language_learning/2019.10.29_matlab_example/matlab_example.m
Executable file
41
language_learning/2019.10.29_matlab_example/matlab_example.m
Executable file
@@ -0,0 +1,41 @@
|
||||
% This code is supported by the website: https://www.guanjihuan.com
|
||||
% The newest version of this code is on the web page: https://www.guanjihuan.com/archives/766
|
||||
|
||||
|
||||
%在matlab里加上百分号“%”是注释。
|
||||
%快捷键:选中按ctrl+R为注释,选中按ctrl+T为取消注释,
|
||||
clc; %clc有窗口清空的效果,一般都用上
|
||||
clear all; %clear all可以清空所有变量,一般都用上
|
||||
clf; %clf为清空输出的图片内容,在画图的时候最好添加上
|
||||
|
||||
aa=1 %没加分号“;”,默认打印输出
|
||||
bb=2; %加了分号“;”,即不打印输出
|
||||
cc1=zeros(2,3) %零矩阵用zeros()
|
||||
cc2=eye(3,3) %单位矩阵
|
||||
|
||||
%矩阵乘积
|
||||
matrix1=[3,3;3,3] %里面分号代表矩阵换一行。下标是从1开始记。
|
||||
matrix2=[2,0;0,2]
|
||||
matrix_product_1=matrix1*matrix2 % *是正常的矩阵乘积
|
||||
matrix_product_2=matrix1.*matrix2 % .*是矩阵每个元素对应相乘
|
||||
|
||||
%循环
|
||||
for i0=1:0.5:2 %循环内容为for到end。a:b:c代表最小为a,最大为c,步长为b
|
||||
for_result=i0+1i %i在matlab中代表虚数,所以起变量名最好不要用i。要输出内容,后面不加分号即可
|
||||
end
|
||||
|
||||
%判断
|
||||
if aa~=1 %在matlab中,~=代表不等于,==代表等于
|
||||
dd=100
|
||||
else
|
||||
dd=300
|
||||
end
|
||||
|
||||
matrix=[2,3;5,7]
|
||||
%求本征矢和本征值
|
||||
[V,D]=eig(matrix) %在matlab中,V的列向量是本征矢,注意是列。D的对角上是对应本征值。
|
||||
%求逆
|
||||
inv1=inv(matrix) %求逆
|
||||
inv2=matrix^-1 %求逆也可以这样写
|
||||
%画图
|
||||
plot([0:20],[10:-1:-10],'-o') %更多画图技巧可参考官方文档或网上资料
|
164
language_learning/2019.10.31_0_fortran_example/Console1.f90
Executable file
164
language_learning/2019.10.31_0_fortran_example/Console1.f90
Executable file
@@ -0,0 +1,164 @@
|
||||
! This code is supported by the website: https://www.guanjihuan.com
|
||||
! The newest version of this code is on the web page: https://www.guanjihuan.com/archives/762
|
||||
|
||||
|
||||
module global ! module是用来封装程序模块的,把相关功能的变量和函数封装在一起。一般来说,可以不设置全局变量,把这些变量写在module里,在需要用的地方用use调用即可。
|
||||
implicit none
|
||||
double precision sqrt3,Pi
|
||||
parameter(sqrt3=1.7320508075688773d0,Pi=3.14159265358979324d0) ! parameter代表不能改的常数
|
||||
end module global
|
||||
|
||||
|
||||
program main !主函数用program开始,用end program结束。在Fortran里不区分大小写。用感叹号!来做注释
|
||||
use global
|
||||
use f95_precision !这个还不知道什么时候用上,这里注释掉也可正常运行。
|
||||
use blas95 ! 里面包含了矩阵相乘的gemm()等
|
||||
use lapack95 !里面包括了矩阵求逆的GETRF,GETRI和求本征矢和本征矢的GEEV等
|
||||
implicit none ! implicit是用来设置默认类型,即根据变量名称的第一个字母来决定变量的类型。implicit none是关闭默认类型功能,所有变量要先声明
|
||||
|
||||
|
||||
integer i,j,info,index1(2) ! 定义整型
|
||||
double precision a(2,2),b(2,2),c(2,2),& ! 比较长的语句可以用&换行。在续行的开始位置可加&号,也可不加。
|
||||
x1, x2, result_1, result_2, fun1 !定义双精度浮点数
|
||||
complex*16 dd(2,2), eigenvalues(2) !定义复数
|
||||
complex*16, allocatable:: eigenvectors(:,:) ! 定义动态分配的变量 !这里的两个冒号::是必须要的。其他的可加可不加。
|
||||
character(len=15) hello, number ! 定义字符串,len是规定长度,如果不写,只会给一个字符的空间
|
||||
allocate(eigenvectors(2,2)) ! 分配空间
|
||||
|
||||
|
||||
write(*,*) '----输出----'
|
||||
hello='hello world'
|
||||
write(*,*) hello ! 第一个代表输出的设备,*代表屏幕。第二个是输出的格式,*代表默认。
|
||||
write(number,'(f7.3)') pi ! 用write可以把数字类型转成字符类型。'(f7.3)'是输出浮点数的格式,如果用*来代替,字符串的长度需要够长才行。整型格式用类似'(i3)'这样
|
||||
write(*,*) '数字转成字符串后再输出:', number
|
||||
write(*,"(a,18x)",advance="no") hello ! advance='no'代表不换行,在有advance的时候,必须格式化输出,否则报错。'(a)'按照字符型变量的实际长度读取,这里也可以写a15或者其他。'(10x)'代表空格
|
||||
write(*,*) number,'这是不换行输出测试'
|
||||
write(*,"('一些固定文字也可以写在这里面', a, a,//)") hello, number !字符串也可以直接写在"()"里面。里面有引号,外面要用上双引号才行,不然会报错。
|
||||
!'(a)'按照字符型变量的实际长度读取,也可以写a15或者其他。这里'(/)'代表再换一次行。一个斜杠换一个。
|
||||
|
||||
|
||||
write(*,*) '----写入文件----'
|
||||
open(unit=10,file='learn-fortran-test.txt') ! 打开文件用open
|
||||
write(10,*) hello, number
|
||||
close(10) ! 关闭文件用close
|
||||
write(*,*) ''
|
||||
|
||||
write(*,*) '----矩阵乘积----'
|
||||
a(1,1)=2;a(1,2)=5;a(2,1)=3;a(2,2)=2 ! 两个语句写在同一行是可以的,要用分号隔开
|
||||
b(1,1)=3;b(2,2)=3
|
||||
write(*,*) '矩阵直接默认输出,是按列的顺序一个个输出'
|
||||
write(*,*) 'a='
|
||||
write(*,*) a
|
||||
write(*,*) '矩阵格式化输出'
|
||||
write(*,*) 'a='
|
||||
do i=1,2
|
||||
do j=1,2
|
||||
write(*,'(f10.4)',advance='no') a(i,j) !内循环为列的指标
|
||||
enddo
|
||||
write(*,*) ''
|
||||
enddo
|
||||
write(*,*) 'b='
|
||||
do i=1,2
|
||||
do j=1,2
|
||||
write(*,'(f10.4)',advance='no') b(i,j) !内循环为列的指标
|
||||
enddo
|
||||
write(*,*) ''
|
||||
enddo
|
||||
call gemm(a,b,c) ! 矩阵乘积用call gemm()
|
||||
write(*,*) '矩阵乘积:c=a*b='
|
||||
do i=1,2
|
||||
do j=1,2
|
||||
write(*,'(f10.4)',advance='no') c(i,j) !内循环为列的指标
|
||||
enddo
|
||||
write(*,*) ''
|
||||
enddo
|
||||
write(*,*) ''
|
||||
|
||||
|
||||
write(*,*) '----矩阵求逆----'
|
||||
call getrf(a,index1,info); call getri(a,index1,info) !getrf和getri要配合起来使用求逆。
|
||||
! info是需定义为整型。If info = 0, the execution is successful.
|
||||
! 上面index1是在getrf产生,在getri里输入。index1也是需要定义为整型,而且是一维数组,数组长度一般为矩阵的维度。
|
||||
! 这时候a不再是原来的矩阵了,而是求逆后的矩阵。
|
||||
do i=1,2
|
||||
do j=1,2
|
||||
write(*,'(f10.4)',advance='no') a(i,j) !内循环为列的指标
|
||||
enddo
|
||||
write(*,*) ''
|
||||
enddo
|
||||
|
||||
|
||||
write(*,*) '----复数矩阵----'
|
||||
dd(1,1)=(1.d0, 0.d0)
|
||||
dd(1,2)=(7.d0, 0.d0)
|
||||
dd(2,1)=(3.d0, 0.d0)
|
||||
dd(2,2)=(2.d0, 0.d0)
|
||||
do i=1,2
|
||||
do j=1,2
|
||||
write(*,"(f10.4, '+1i*',f7.4)",advance='no') dd(i,j) !内循环为列的指标
|
||||
enddo
|
||||
write(*,*) ''
|
||||
enddo
|
||||
write(*,*) ''
|
||||
|
||||
|
||||
write(*,*) '----矩阵本征矢和本征值----'
|
||||
call geev(A=dd, W=eigenvalues, VR=eigenvectors, INFO=info)
|
||||
! 这里A矩阵最好用上复数,W是本征值一维数组,VR是本征矢二维数组,都是复数。INFO是整数。
|
||||
! 注意求完本征值后,dd的值会发生改变,不再是原来的了!
|
||||
write(*,*) 'eigenvectors:'
|
||||
do i=1,2
|
||||
do j=1,2
|
||||
write(*,"(f10.4, '+1i*',f7.4)",advance='no') eigenvectors(i,j) !内循环为列的指标。输出结果列向量为特征向量。
|
||||
enddo
|
||||
write(*,*) ''
|
||||
enddo
|
||||
write(*,*) 'eigenvalues:'
|
||||
do i=1,2
|
||||
write(*,"(f10.4, '+1i*',f7.4)",advance='no') eigenvalues(i)
|
||||
enddo
|
||||
write(*,*) ''
|
||||
deallocate(eigenvectors) ! 释放动态变量的空间
|
||||
|
||||
|
||||
write(*,*) '' ! 输出空一行
|
||||
write(*,*) '----循环加判断----'
|
||||
do i=1,5 ! 循环用do到enddo
|
||||
if (mod(i,2)==0) then ! 判断用if()then
|
||||
write(*,*) '我是偶数', i
|
||||
else if (i==3) then
|
||||
write(*,*) '我是第3个数字,也是奇数'
|
||||
else
|
||||
write(*,*) '我是奇数', i
|
||||
endif
|
||||
enddo
|
||||
write(*,*) ''
|
||||
|
||||
|
||||
call sub1(2.d0, 3.d0, result_1, result_2) ! 这里要写成2.d0或者2.0d0表示双精度,因为子程序规定该参数为双精度。写成2或者2.0都会报错。
|
||||
write(*,*) '调用子程序,求和:',result_1
|
||||
write(*,*) '调用子程序,乘积:',result_2
|
||||
write(*,*) '使用函数,返回减法结果:', fun1(2.d0, 3.d0)
|
||||
write(*,*) ''
|
||||
|
||||
|
||||
end program
|
||||
|
||||
|
||||
|
||||
subroutine sub1(x1,x2,y1,y2) !子程序。输入输出都在括号里面,用call调用。
|
||||
double precision,intent(in):: x1, x2 ! 这里的两个冒号::是必须要的。
|
||||
double precision,intent(out):: y1, y2
|
||||
! intent(in) 表示这个参数是输入的;intent(out) 表示参数是输出的;intent(inout)表示这个参数同时用于两个方向的数据传递;
|
||||
! intent()不是必须的,但最好加上,因为可读性比较强,知道哪些是输入,哪些是输出。而且intent(in)是不能赋值更改的,会提示错误,这样可以防止一些错误。
|
||||
y1=x1+x2
|
||||
y2=x1*x2
|
||||
end subroutine
|
||||
|
||||
|
||||
|
||||
function fun1(x1,x2) ! 函数。函数只能返回一个数值,不能多个。而子程序可以返回多个,所以一般用子程序subroutine
|
||||
double precision x1,x2,fun1 ! 要对函数名(或返回变量)定义
|
||||
fun1=x1-x2 ! 返回变量要和函数名一样
|
||||
return ! 这里的return也可以不写。写的作用是直接返回值,而不运行后面的代码。一般会跟if配合用。
|
||||
end function ! 也可以直接写end,不会报错。但最好把后面的也带上,看起来比较清晰点。
|
@@ -0,0 +1,78 @@
|
||||
! This code is supported by the website: https://www.guanjihuan.com
|
||||
! The newest version of this code is on the web page: https://www.guanjihuan.com/archives/764
|
||||
|
||||
|
||||
program hello_open_mp
|
||||
use omp_lib !这里也可以写成 include 'omp_lib.h' ,两者调用方式均可
|
||||
integer mcpu,tid,total,N,i,j,loop
|
||||
double precision starttime, endtime, time,result_0
|
||||
double precision, allocatable:: T(:)
|
||||
N=5 ! 用于do并行
|
||||
loop=1000000000 !如果要测试并行和串行运算时间,可以加大loop值
|
||||
allocate(T(N))
|
||||
|
||||
|
||||
!call OMP_SET_NUM_THREADS(2) !人为设置线程个数,可以取消注释看效果
|
||||
total=OMP_GET_NUM_PROCS() ! 获取计算机系统的处理器数量
|
||||
print '(a,i2)', '计算机处理器数量:' , total !也可以用write(*,'(a,i2)')来输出
|
||||
print '(a)', '-----在并行之前-----'
|
||||
tid=OMP_GET_THREAD_NUM() !获取当前线程的线程号
|
||||
mcpu=OMP_GET_NUM_THREADS() !获取总的线程数
|
||||
print '(a,i2,a,i2)', '当前线程号:',tid,';总的线程数:', mcpu
|
||||
print * !代表换行
|
||||
|
||||
|
||||
print'(a)','-----第一部分程序开始并行-----'
|
||||
!$OMP PARALLEL DEFAULT(PRIVATE) ! 这里用的是DEFAULT(PRIVATE)
|
||||
tid=OMP_GET_THREAD_NUM() !获取当前线程的线程号
|
||||
mcpu=OMP_GET_NUM_THREADS() !获取总的线程数
|
||||
print '(a,i2,a,i2)', '当前线程号:',tid,';总的线程数:', mcpu
|
||||
!$OMP END PARALLEL
|
||||
|
||||
|
||||
print * !代表换行
|
||||
print'(a)','-----第二部分程序开始并行-----'
|
||||
starttime=OMP_GET_WTIME() !获取开始时间
|
||||
!$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(T,N,loop) ! 默认私有变量,把需要的参数以及各节点计算结果的存放器作为共享变量。
|
||||
do i=1,N !这里放上do循环体。是多个样品。
|
||||
result_0=0
|
||||
tid=OMP_GET_THREAD_NUM() !获取当前线程的线程号
|
||||
mcpu=OMP_GET_NUM_THREADS() !获取总的线程数
|
||||
do j=1,loop !这代表我们要做的计算~
|
||||
result_0 = result_0+1 !这代表我们要做的计算~
|
||||
enddo !这代表我们要做的计算~
|
||||
T(i) = result_0-loop+i !将各个线程的计算结果保存到公共变量中去。
|
||||
!这里i代表各个循环的参数,之后如果有需要可以根据参数再整理数据。
|
||||
print '(a,i2, a, f10.4,a,i2,a,i2 )', 'T(',i,')=', T(i) , ' 来源于线程号',tid,';总的线程数:', mcpu
|
||||
enddo
|
||||
!$OMP END PARALLEL DO !并行结束
|
||||
endtime=OMP_GET_WTIME() !获取结束时间
|
||||
time=endtime-starttime !总运行时间
|
||||
print '(a, f13.5)' , '第二部分程序按并行计算所用的时间:', time
|
||||
|
||||
|
||||
print * !代表换行
|
||||
print'(a)','-----第二部分程序按串行的计算-----'
|
||||
starttime=OMP_GET_WTIME() !获取开始时间
|
||||
do i=1,N
|
||||
result_0=0
|
||||
tid=OMP_GET_THREAD_NUM() !获取当前线程的线程号
|
||||
mcpu=OMP_GET_NUM_THREADS() !获取总的线程数
|
||||
do j=1,loop
|
||||
result_0 = result_0+1
|
||||
enddo
|
||||
T(i) = result_0-loop+i
|
||||
print '(a,i2, a, f10.4,a,i2,a,i2 )', 'T(' ,i,')=', T(i) , ' 来源于线程号',tid,';总的线程数:', mcpu
|
||||
enddo
|
||||
endtime=OMP_GET_WTIME() !获取结束时间
|
||||
time=endtime-starttime !总运行时间
|
||||
print '(a, f13.5)' , '第二部分程序按串行计算所用的时间:', time
|
||||
print * !代表换行
|
||||
|
||||
|
||||
tid=OMP_GET_THREAD_NUM() !获取当前线程的线程号
|
||||
mcpu=OMP_GET_NUM_THREADS() !获取总的线程数
|
||||
print '(a,i5,a,i5)', '当前线程号:',tid,';总的线程数:', mcpu
|
||||
print * !代表换行
|
||||
end program hello_open_mp ! 这里可以写成end, 也可以写成end program,都可以。
|
||||
|
33
language_learning/2019.11.16_markdown_example/markdown_example.md
Executable file
33
language_learning/2019.11.16_markdown_example/markdown_example.md
Executable file
@@ -0,0 +1,33 @@
|
||||
# 一级标题
|
||||
## 二级标题
|
||||
### 三级标题
|
||||
#### 四级标题
|
||||
|
||||
有序列表:数字加一个点
|
||||
1. 列表内容
|
||||
2. 列表内容
|
||||
3. 列表内容
|
||||
|
||||
无序列表:用 + - * 任何一种都可以(为了不和其他记号重复,个人倾向于用+)
|
||||
+ 列表内容
|
||||
+ 嵌套前面加几个空格(为了保险起见,个人倾向于用四个空格)
|
||||
+ 列表内容
|
||||
+ 列表嵌套
|
||||
+ 列表嵌套
|
||||
+ 列表嵌套
|
||||
|
||||
分割线:三个或者三个以上的 - 或 *(为了不和其他记号重复,个人倾向于用---)
|
||||
|
||||
----
|
||||
|
||||
*倾斜:前后一个星号*
|
||||
|
||||
**加粗:前后两个星号**
|
||||
|
||||
***斜体加粗:前后三个星号***
|
||||
|
||||
| 表头 | 表头 | 表头 |
|
||||
| --- | --- | --- |
|
||||
| 内容 | 内容 | 内容 |
|
||||
| 内容 | 内容 | 内容 |
|
||||
| 内容 | 内容 | 内容 |
|
6
language_learning/2019.12.04_latex_example/basic_structure.tex
Executable file
6
language_learning/2019.12.04_latex_example/basic_structure.tex
Executable file
@@ -0,0 +1,6 @@
|
||||
\documentclass{article} %文档类声明
|
||||
%导言区(文档类声明和正文间的是导言区)
|
||||
\begin{document} %正文开始
|
||||
Hello, world! %正文
|
||||
\end{document} %正文结束
|
||||
\end{document}
|
18
language_learning/2019.12.04_latex_example/simple_example.tex
Executable file
18
language_learning/2019.12.04_latex_example/simple_example.tex
Executable file
@@ -0,0 +1,18 @@
|
||||
\documentclass{article} %文档类声明
|
||||
\usepackage{ctex} %一个支持中文宏包,如果不用中文无法显示
|
||||
|
||||
|
||||
\begin{document} %正文
|
||||
\title{这是一个标题} %标题
|
||||
\author{作者名字} %作者
|
||||
\date{} %\maketitle默认会加上当前时间,用\date{},空着内容可以取消时间的显示
|
||||
\maketitle %加了这个,标题、作者等信息才会显示
|
||||
\section{节}
|
||||
\subsection{小节}
|
||||
\subsubsection{子小节}
|
||||
Hello, world! %下面空一行代表换行
|
||||
\textbf{用\textbackslash textbf\{\}可以加粗文本} %\textbf{}可以加粗文本
|
||||
\section{节}
|
||||
\part{}
|
||||
\part{}
|
||||
\end{document}
|
BIN
language_learning/2019.12.05_beamer_as_slides/1.jpg
Executable file
BIN
language_learning/2019.12.05_beamer_as_slides/1.jpg
Executable file
Binary file not shown.
After Width: | Height: | Size: 18 KiB |
50
language_learning/2019.12.05_beamer_as_slides/beamer.tex
Executable file
50
language_learning/2019.12.05_beamer_as_slides/beamer.tex
Executable file
@@ -0,0 +1,50 @@
|
||||
\documentclass{beamer}
|
||||
\usepackage{ctex} %一个支持中文宏包,如果不用中文无法显示
|
||||
\usepackage{graphicx} %这个包提供\includegraphics命令来插入图片
|
||||
|
||||
\usetheme{Boadilla} %主题
|
||||
\usecolortheme{default} %主题的颜色
|
||||
|
||||
\title{这是PPT标题} %标题
|
||||
\author{作者名字\inst{1},作者名字\inst{2}} %作者
|
||||
\institute{\inst{1}第一个单位\and\inst{2}第二个单位} %这里的\and有换行的效果
|
||||
\date{\today} %时间(默认也会显示)
|
||||
\logo{\includegraphics[height=1.0cm]{1.jpg}} %右下角的小log
|
||||
|
||||
\begin{document} %正文开始
|
||||
\begin{frame} %相当于ppt里的一页
|
||||
\titlepage %标题页
|
||||
\end{frame}
|
||||
\begin{frame}
|
||||
\frametitle{目录} %当前页的标题
|
||||
\tableofcontents %制作目录,需要\section{}配合
|
||||
\end{frame}
|
||||
|
||||
\section{第一节} %用来做目录
|
||||
\begin{frame}
|
||||
\frametitle{当前页的标题1}
|
||||
这是第一节第一页的内容。This is a text in the first frame. This is a text in the first frame. This is a text in the first frame.
|
||||
\end{frame}
|
||||
|
||||
\section{第二节}
|
||||
\begin{frame}
|
||||
\frametitle{当前页的标题2}
|
||||
这是第二节第一页的内容。这里使用了\textbackslash pause。\pause This is a text in the second frame. This is a text in the second frame. This is a text in the second frame. %\pause是暂停,前后会分成两页。
|
||||
\end{frame}
|
||||
\begin{frame}
|
||||
\frametitle{当前页的标题3:Two-column slide}
|
||||
\begin{columns} %分成列
|
||||
\column{0.5\textwidth} %占用一半
|
||||
This is a text in first column.
|
||||
$$E=mc^2$$
|
||||
\begin{itemize} %制作列表
|
||||
\item First item
|
||||
\item Second item
|
||||
\end{itemize}
|
||||
\column{0.5\textwidth} %占用一半
|
||||
This text will be in the second column
|
||||
and on a second tought this is a nice looking
|
||||
layout in some cases.
|
||||
\end{columns}
|
||||
\end{frame}
|
||||
\end{document}
|
@@ -0,0 +1,41 @@
|
||||
from multiprocessing import Process
|
||||
import os
|
||||
import time
|
||||
|
||||
def run_proc(name): # 要执行的代码
|
||||
start_time = time.perf_counter()
|
||||
for i in range(300000000):
|
||||
x = 100000^1000000000000
|
||||
end_time = time.perf_counter()
|
||||
print ('Process id running on %s = %s' % (name, os.getpid()), '; running time = %s' % (end_time-start_time))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# 串行
|
||||
print('串行程序')
|
||||
print('Process id = %s.' % os.getpid())
|
||||
start_time = time.perf_counter()
|
||||
run_proc('job1')
|
||||
run_proc('job2')
|
||||
run_proc('job3')
|
||||
run_proc('job4')
|
||||
end_time = time.perf_counter()
|
||||
print('CPU执行时间(s)=', (end_time-start_time), '\n')
|
||||
|
||||
# 并行
|
||||
print('并行程序')
|
||||
print('Process id = %s.' % os.getpid())
|
||||
start_time = time.perf_counter()
|
||||
p = Process(target=run_proc, args=('job1',))
|
||||
p.start()
|
||||
p = Process(target=run_proc, args=('job2',))
|
||||
p.start()
|
||||
p = Process(target=run_proc, args=('job3',))
|
||||
p.start()
|
||||
p = Process(target=run_proc, args=('job4',))
|
||||
p.start()
|
||||
p.join() # join()方法可以等待子进程结束后再继续往下运行
|
||||
end_time = time.perf_counter()
|
||||
print('CPU执行时间(s)=', (end_time-start_time))
|
||||
|
@@ -0,0 +1,47 @@
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.request import urlopen
|
||||
|
||||
# 最简单的情况
|
||||
html = urlopen("https://mofanpy.com/static/scraping/basic-structure.html").read().decode('utf-8')
|
||||
print('\n显示网页的代码信息1:\n\n ----------------开始----------------\n', html, '\n\n----------------结束----------------') # 显示网页的代码信息
|
||||
|
||||
soup = BeautifulSoup(html, features='lxml') # 把网页放进BeautifulSoup
|
||||
print('\n获取标签_标题h1_中的内容soup.h1:\n', soup.h1)
|
||||
print('\n获取标签_段落p_中的内容soup.p:\n', soup.p)
|
||||
print('\n获取标签_链接a_中的内容soup.a:\n', soup.a)
|
||||
|
||||
all_href = soup.find_all('a')
|
||||
print('\n获取所有"a标签"的内容soup.find_all(‘a’):\n', all_href)
|
||||
|
||||
print('\n获取某个字典的值_1:')
|
||||
for a in all_href:
|
||||
print(a)
|
||||
print(a['href'])
|
||||
|
||||
all_href = [a['href'] for a in all_href]
|
||||
print('\n获取某个字典的值_2:\n', all_href, '\n')
|
||||
|
||||
|
||||
|
||||
|
||||
# 加入CSS内容
|
||||
html = urlopen("https://mofanpy.com/static/scraping/list.html").read().decode('utf-8')
|
||||
print('\n显示网页的代码信息2:\n\n ----------------开始----------------\n', html, '\n\n----------------结束----------------') # 显示网页的代码信息
|
||||
|
||||
soup = BeautifulSoup(html, features='lxml') # 把网页放进BeautifulSoup
|
||||
|
||||
print('\n利用class筛选出所需要的信息:')
|
||||
month = soup.find_all('li', {"class": "month"})
|
||||
print(month, '\n')
|
||||
|
||||
print('只显示文本:')
|
||||
for m in month:
|
||||
print(m.get_text())
|
||||
|
||||
print('\n 多次筛选:')
|
||||
january = soup.find('ul', {"class": 'jan'})
|
||||
print(january, '\n')
|
||||
d_january = january.find_all('li') # use january as a parent
|
||||
print(d_january, '\n')
|
||||
for d in d_january:
|
||||
print(d.get_text())
|
@@ -0,0 +1,45 @@
|
||||
"""
|
||||
This code is supported by the website: https://www.guanjihuan.com
|
||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/6846
|
||||
"""
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.request import urlopen
|
||||
import re # 正则模块
|
||||
import requests
|
||||
import os
|
||||
# os.chdir('D:') # 设置文件保存的位置
|
||||
|
||||
|
||||
# 输入
|
||||
address_array = []
|
||||
for i in range(10): # 最多一次性下载10篇
|
||||
address = input('\n输入DOI/链接/标题:')
|
||||
address_array.append(address)
|
||||
continue_or_not = input('\n继续添加(1)/不继续添加(0):')
|
||||
if int(continue_or_not) == 0:
|
||||
break
|
||||
|
||||
# 下载
|
||||
for address in address_array:
|
||||
r = requests.post('https://sci-hub.st/', data={'request': address})
|
||||
print('\n响应结果是:', r)
|
||||
print('访问的地址是:', r.url)
|
||||
soup = BeautifulSoup(r.text, features='lxml')
|
||||
pdf_URL = soup.iframe['src']
|
||||
if re.search(re.compile('^https:'), pdf_URL):
|
||||
pass
|
||||
else:
|
||||
pdf_URL = 'https:'+pdf_URL
|
||||
print('PDF的地址是:', pdf_URL)
|
||||
name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::]
|
||||
print('PDF文件名是:', name)
|
||||
print('保存的位置在:', os.getcwd())
|
||||
print('\n正在下载')
|
||||
r = requests.get(pdf_URL, stream=True)
|
||||
with open(name, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=32):
|
||||
f.write(chunk)
|
||||
print('下载完成!')
|
||||
|
||||
print('\n全部下载完成!')
|
@@ -0,0 +1,77 @@
|
||||
"""
|
||||
This code is supported by the website: https://www.guanjihuan.com
|
||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/6869
|
||||
"""
|
||||
|
||||
import PyPDF2
|
||||
import os
|
||||
import re
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.request import urlopen
|
||||
import requests
|
||||
|
||||
|
||||
|
||||
def main():
|
||||
os.chdir('D:/') # PDF文件存放的位置
|
||||
filename = input('输入PDF文件名:')
|
||||
pdfFile = open(filename+'.pdf','rb') # 打开PDF文件
|
||||
links = all_links_in_pdf(pdfFile) # 获取PDF文件中的链接
|
||||
pdfFile.close() # 关闭PDF文件
|
||||
os.chdir('D:/Reference') # 设置参考文献保存的位置
|
||||
download(links) # 下载文献
|
||||
|
||||
|
||||
|
||||
def all_links_in_pdf(pdfFile):
|
||||
pdfReader = PyPDF2.PdfFileReader(pdfFile)
|
||||
pages = pdfReader.getNumPages()
|
||||
i0 = 0
|
||||
links = []
|
||||
print()
|
||||
for page in range(pages):
|
||||
pageSliced = pdfReader.getPage(page)
|
||||
pageObject = pageSliced.getObject()
|
||||
if '/Annots' in pageObject.keys():
|
||||
ann = pageObject['/Annots']
|
||||
old = ''
|
||||
for a in ann:
|
||||
u = a.getObject()
|
||||
if '/A' in u.keys():
|
||||
if re.search(re.compile('^https://doi.org'), u['/A']['/URI']): # 排除其他形式的链接
|
||||
if u['/A']['/URI'] != old: # 排除重复链接
|
||||
print(i0 , u['/A']['/URI'])
|
||||
links.append(u['/A']['/URI']) # 链接存在link数组中
|
||||
i0 += 1
|
||||
old = u['/A']['/URI']
|
||||
return links
|
||||
|
||||
|
||||
|
||||
def download(links):
|
||||
for i0 in [0, 1, 3]: # 指定参考文献下载,如需全部下载用for i0 in range(links.shape[0]):
|
||||
address = links[i0]
|
||||
r = requests.post('https://sci-hub.st/', data={'request': address})
|
||||
print('\n响应结果是:', r)
|
||||
print('访问的地址是:', r.url)
|
||||
soup = BeautifulSoup(r.text, features='lxml')
|
||||
pdf_URL = soup.iframe['src']
|
||||
if re.search(re.compile('^https:'), pdf_URL):
|
||||
pass
|
||||
else:
|
||||
pdf_URL = 'https:'+pdf_URL
|
||||
print('PDF的地址是:', pdf_URL)
|
||||
name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::]
|
||||
print('PDF文件名是:', name)
|
||||
print('保存的位置在:', os.getcwd())
|
||||
print('\n正在下载第',i0,'篇')
|
||||
r = requests.get(pdf_URL, stream=True)
|
||||
with open(name, 'wb') as f:
|
||||
for chunk in r.iter_content(chunk_size=32):
|
||||
f.write(chunk)
|
||||
print('第',i0,'篇下载完成!')
|
||||
print('\n全部下载完成!')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@@ -0,0 +1,30 @@
|
||||
"""
|
||||
This code is supported by the website: https://www.guanjihuan.com
|
||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/6869
|
||||
"""
|
||||
|
||||
import PyPDF2
|
||||
import os
|
||||
import re
|
||||
|
||||
os.chdir('D:/') # PDF文件存放的位置
|
||||
filename = input('输入PDF文件名:')
|
||||
pdfFile = open(filename+'.pdf','rb')
|
||||
pdfReader = PyPDF2.PdfFileReader(pdfFile)
|
||||
pages = pdfReader.getNumPages()
|
||||
i0 = 0
|
||||
for page in range(pages):
|
||||
pageSliced = pdfReader.getPage(page)
|
||||
pageObject = pageSliced.getObject()
|
||||
if '/Annots' in pageObject.keys():
|
||||
ann = pageObject['/Annots']
|
||||
old = ''
|
||||
for a in ann:
|
||||
u = a.getObject()
|
||||
if '/A' in u.keys():
|
||||
if re.search(re.compile('^https://doi.org'), u['/A']['/URI']): # 排除其他形式的链接
|
||||
if u['/A']['/URI'] != old: # 排除重复链接
|
||||
print(i0 , u['/A']['/URI'])
|
||||
i0 += 1
|
||||
old = u['/A']['/URI']
|
||||
pdfFile.close()
|
@@ -0,0 +1,21 @@
|
||||
import os
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
|
||||
print('程序1开始的时间:', time.ctime())
|
||||
start1 = time.time()
|
||||
os.chdir('D:') # 代码位置
|
||||
os.system('python a.py') # 运行a.py
|
||||
end1 = time.time()
|
||||
print('程序1运行时间(min)=', (end1-start1)/60,'\n')
|
||||
|
||||
print('程序2开始的时间:', time.ctime())
|
||||
start2 = time.time()
|
||||
os.chdir('E:') # 代码位置
|
||||
os.system('python b.py') # 运行b.py
|
||||
end2 = time.time()
|
||||
print('程序2运行时间(min)=', (end2-start2)/60, '\n')
|
||||
|
||||
end = time.time()
|
||||
print('总运行时间(min)=', (end-start)/60)
|
@@ -0,0 +1,137 @@
|
||||
"""
|
||||
This code is supported by the website: https://www.guanjihuan.com
|
||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/9129
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
import logging
|
||||
logging.Logger.propagate = False
|
||||
logging.getLogger().setLevel(logging.ERROR) # 只显示error级别的通知
|
||||
|
||||
|
||||
def main():
|
||||
# 参数
|
||||
key_word_array = ['photonic', 'Berry phase']
|
||||
original_path = 'D:\\文献'
|
||||
|
||||
# 查找所有的PDF文件路径
|
||||
pdf_file_all = find_files_pdf(original_path)
|
||||
print('\n该文件夹下总共有', len(pdf_file_all), '个PDF文件。\n')
|
||||
|
||||
f = open('error.txt','w',encoding='utf-8')
|
||||
f.close()
|
||||
for key_word in key_word_array:
|
||||
f = open(str(key_word)+'.txt','w',encoding='utf-8')
|
||||
f.write('该文件夹下总共有'+str(len(pdf_file_all))+'个PDF文件。\n')
|
||||
f.close()
|
||||
|
||||
# 查找包含关键词的PDF文件
|
||||
i0 = 1
|
||||
begin = time.time()
|
||||
for pdf_file in pdf_file_all:
|
||||
print('查找第', i0, '个文件,', end='')
|
||||
begin0 = time.time()
|
||||
try:
|
||||
content = get_text_from_pdf(pdf_file)
|
||||
for key_word in key_word_array:
|
||||
if re.search(re.compile(key_word),content):
|
||||
print('发现文件!关键词', key_word, '对应的文件位置在:\n\n', pdf_file, '\n')
|
||||
with open(str(key_word)+'.txt','a',encoding='utf-8') as f:
|
||||
f.write('\n查找第'+str(i0)+'个文件时发现文件!位置在:\n'+pdf_file+'\n')
|
||||
except:
|
||||
print('出现异常!位置在:\n\n', pdf_file, '\n')
|
||||
with open('error.txt','a',encoding='utf-8') as f:
|
||||
f.write('\n解析第'+str(i0)+'个文件时出现异常!位置在:\n'+pdf_file+'\n')
|
||||
end0 = time.time()
|
||||
print('用时', end0-begin0, '秒')
|
||||
i0 += 1
|
||||
print('\n全部搜索结束!')
|
||||
end = time.time()
|
||||
print('\n总共用时:', (end-begin)/60, '分')
|
||||
|
||||
|
||||
def find_files_pdf(path): # 查找所有PDF文件
|
||||
file_all = find_files(path)
|
||||
pdf_file_all = []
|
||||
for file0 in file_all:
|
||||
if re.search(re.compile('^fdp.'),file0[::-1]): # 如果文件是以.pdf结尾
|
||||
pdf_file_all.append(file0)
|
||||
return pdf_file_all
|
||||
|
||||
|
||||
def find_files(path): # 查找所有文件
|
||||
file_all = []
|
||||
path_next_loop = [path]
|
||||
for i in range(10000): # i为文件在文件夹中的深度
|
||||
file_all_in_one_loop, path_next_loop = find_files_loop_module(path_next_loop)
|
||||
for file_in_one_loop in file_all_in_one_loop:
|
||||
file_all.append(file_in_one_loop)
|
||||
if path_next_loop == []:
|
||||
break
|
||||
return file_all
|
||||
|
||||
|
||||
def find_files_loop_module(path_all): # 查找文件的一个循环模块
|
||||
file_all_in_one_loop = []
|
||||
path_next_loop = []
|
||||
for path in path_all:
|
||||
filenames = os.listdir(path)
|
||||
for filename in filenames:
|
||||
filename = os.path.join(path,filename)
|
||||
if os.path.isfile(filename): # 如果是文件
|
||||
file_all_in_one_loop.append(filename)
|
||||
else: # 如果是文件夹
|
||||
path_next_loop.append(filename)
|
||||
return file_all_in_one_loop, path_next_loop
|
||||
|
||||
|
||||
def get_text_from_pdf(file_path): # 从PDF中获取文本
|
||||
from pdfminer.pdfparser import PDFParser, PDFDocument
|
||||
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
||||
from pdfminer.converter import PDFPageAggregator
|
||||
from pdfminer.layout import LAParams, LTTextBox
|
||||
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
|
||||
|
||||
# 用文件对象来创建一个pdf文档分析器
|
||||
praser = PDFParser(open(file_path, 'rb'))
|
||||
# 创建一个PDF文档
|
||||
doc = PDFDocument()
|
||||
# 连接分析器 与文档对象
|
||||
praser.set_document(doc)
|
||||
doc.set_parser(praser)
|
||||
|
||||
# 提供初始化密码
|
||||
# 如果没有密码 就创建一个空的字符串
|
||||
doc.initialize()
|
||||
|
||||
# 检测文档是否提供txt转换,不提供就忽略
|
||||
if not doc.is_extractable:
|
||||
raise PDFTextExtractionNotAllowed
|
||||
else:
|
||||
# 创建PDf 资源管理器 来管理共享资源
|
||||
rsrcmgr = PDFResourceManager()
|
||||
# 创建一个PDF设备对象
|
||||
laparams = LAParams()
|
||||
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
|
||||
# 创建一个PDF解释器对象
|
||||
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
||||
|
||||
# 循环遍历列表,每次处理一个page的内容
|
||||
content = ''
|
||||
for page in doc.get_pages():
|
||||
interpreter.process_page(page)
|
||||
# 接受该页面的LTPage对象
|
||||
layout = device.get_result()
|
||||
# 这里layout是一个LTPage对象,里面存放着这个 page 解析出的各种对象
|
||||
# 包括 LTTextBox, LTFigure, LTImage, LTTextBoxHorizontal 等
|
||||
for x in layout:
|
||||
if isinstance(x, LTTextBox):
|
||||
# print(x.get_text().strip())
|
||||
content = content + x.get_text().strip()
|
||||
return content
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -0,0 +1,63 @@
|
||||
import os
|
||||
os.chdir('D:/') # PDF文件存放的位置
|
||||
import logging
|
||||
logging.Logger.propagate = False
|
||||
logging.getLogger().setLevel(logging.ERROR) # 只显示error级别的通知
|
||||
|
||||
|
||||
def main():
|
||||
content = get_text_from_pdf('a')
|
||||
with open('a.txt', 'w', encoding='utf-8') as f:
|
||||
f.write(content)
|
||||
|
||||
|
||||
def get_text_from_pdf(filename):
|
||||
from pdfminer.pdfparser import PDFParser, PDFDocument
|
||||
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
||||
from pdfminer.converter import PDFPageAggregator
|
||||
from pdfminer.layout import LAParams, LTTextBox
|
||||
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
|
||||
|
||||
path = filename+".pdf"
|
||||
|
||||
# 用文件对象来创建一个pdf文档分析器
|
||||
praser = PDFParser(open(path, 'rb'))
|
||||
# 创建一个PDF文档
|
||||
doc = PDFDocument()
|
||||
# 连接分析器 与文档对象
|
||||
praser.set_document(doc)
|
||||
doc.set_parser(praser)
|
||||
|
||||
# 提供初始化密码
|
||||
# 如果没有密码 就创建一个空的字符串
|
||||
doc.initialize()
|
||||
|
||||
# 检测文档是否提供txt转换,不提供就忽略
|
||||
if not doc.is_extractable:
|
||||
raise PDFTextExtractionNotAllowed
|
||||
else:
|
||||
# 创建PDf 资源管理器 来管理共享资源
|
||||
rsrcmgr = PDFResourceManager()
|
||||
# 创建一个PDF设备对象
|
||||
laparams = LAParams()
|
||||
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
|
||||
# 创建一个PDF解释器对象
|
||||
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
||||
|
||||
# 循环遍历列表,每次处理一个page的内容
|
||||
content = ''
|
||||
for page in doc.get_pages():
|
||||
interpreter.process_page(page)
|
||||
# 接受该页面的LTPage对象
|
||||
layout = device.get_result()
|
||||
# 这里layout是一个LTPage对象,里面存放着这个 page 解析出的各种对象
|
||||
# 包括 LTTextBox, LTFigure, LTImage, LTTextBoxHorizontal 等
|
||||
for x in layout:
|
||||
if isinstance(x, LTTextBox):
|
||||
# print(x.get_text().strip())
|
||||
content = content + x.get_text().strip()
|
||||
return content
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
29
language_learning/2021.05.10_parallel_calculations_using_sh_files/a.py
Executable file
29
language_learning/2021.05.10_parallel_calculations_using_sh_files/a.py
Executable file
@@ -0,0 +1,29 @@
|
||||
import numpy as np
|
||||
|
||||
# 设置
|
||||
cpus = 7 # 使用的CPU个数(等于提交任务的个数)
|
||||
parameter_array_all = np.arange(0, 10, 0.1) # 需要计算的参数
|
||||
|
||||
|
||||
# 通过.sh脚本文件修改的任务指标。job_index从0开始,最大值为cpus-1
|
||||
job_index = -1
|
||||
|
||||
|
||||
# 预处理
|
||||
len_of_parameter_all = len(parameter_array_all) # 需要计算参数的个数
|
||||
if len_of_parameter_all%cpus == 0:
|
||||
len_parameter = int(len_of_parameter_all/cpus) # 一个CPU/任务需要计算参数的个数
|
||||
parameter_array = parameter_array_all[job_index*len_parameter:(job_index+1)*len_parameter]
|
||||
else:
|
||||
len_parameter = int(len_of_parameter_all/(cpus-1)) # 一个CPU/任务需要计算参数的个数
|
||||
if job_index != cpus-1:
|
||||
parameter_array = parameter_array_all[job_index*len_parameter:(job_index+1)*len_parameter]
|
||||
else:
|
||||
parameter_array = parameter_array_all[job_index*len_parameter:len_of_parameter_all]
|
||||
|
||||
|
||||
# 任务
|
||||
with open('a'+str(job_index)+'.txt', 'w') as f:
|
||||
for parameter in parameter_array:
|
||||
result = parameter**2
|
||||
f.write(str(parameter)+' '+str(result)+'\n')
|
5
language_learning/2021.05.10_parallel_calculations_using_sh_files/a.sh
Executable file
5
language_learning/2021.05.10_parallel_calculations_using_sh_files/a.sh
Executable file
@@ -0,0 +1,5 @@
|
||||
#!/bin/sh
|
||||
#PBS -N task
|
||||
#PBS -l nodes=1:ppn=1
|
||||
export OMP_NUM_THREADS=1
|
||||
python a.py
|
@@ -0,0 +1,6 @@
|
||||
f = open('combine.txt', 'w')
|
||||
for job_index in range(7):
|
||||
with open('a'+str(job_index)+'.txt', 'r') as f0:
|
||||
text = f0.read()
|
||||
f.write(text)
|
||||
f.close()
|
@@ -0,0 +1,16 @@
|
||||
0.0 0.0
|
||||
0.1 0.010000000000000002
|
||||
0.2 0.04000000000000001
|
||||
0.30000000000000004 0.09000000000000002
|
||||
0.4 0.16000000000000003
|
||||
0.5 0.25
|
||||
0.6000000000000001 0.3600000000000001
|
||||
0.7000000000000001 0.4900000000000001
|
||||
0.8 0.6400000000000001
|
||||
0.9 0.81
|
||||
1.0 1.0
|
||||
1.1 1.2100000000000002
|
||||
1.2000000000000002 1.4400000000000004
|
||||
1.3 1.6900000000000002
|
||||
1.4000000000000001 1.9600000000000004
|
||||
1.5 2.25
|
@@ -0,0 +1,16 @@
|
||||
1.6 2.5600000000000005
|
||||
1.7000000000000002 2.8900000000000006
|
||||
1.8 3.24
|
||||
1.9000000000000001 3.6100000000000003
|
||||
2.0 4.0
|
||||
2.1 4.41
|
||||
2.2 4.840000000000001
|
||||
2.3000000000000003 5.290000000000001
|
||||
2.4000000000000004 5.760000000000002
|
||||
2.5 6.25
|
||||
2.6 6.760000000000001
|
||||
2.7 7.290000000000001
|
||||
2.8000000000000003 7.840000000000002
|
||||
2.9000000000000004 8.410000000000002
|
||||
3.0 9.0
|
||||
3.1 9.610000000000001
|
@@ -0,0 +1,16 @@
|
||||
3.2 10.240000000000002
|
||||
3.3000000000000003 10.890000000000002
|
||||
3.4000000000000004 11.560000000000002
|
||||
3.5 12.25
|
||||
3.6 12.96
|
||||
3.7 13.690000000000001
|
||||
3.8000000000000003 14.440000000000001
|
||||
3.9000000000000004 15.210000000000003
|
||||
4.0 16.0
|
||||
4.1000000000000005 16.810000000000006
|
||||
4.2 17.64
|
||||
4.3 18.49
|
||||
4.4 19.360000000000003
|
||||
4.5 20.25
|
||||
4.6000000000000005 21.160000000000004
|
||||
4.7 22.090000000000003
|
@@ -0,0 +1,16 @@
|
||||
4.800000000000001 23.040000000000006
|
||||
4.9 24.010000000000005
|
||||
5.0 25.0
|
||||
5.1000000000000005 26.010000000000005
|
||||
5.2 27.040000000000003
|
||||
5.300000000000001 28.090000000000007
|
||||
5.4 29.160000000000004
|
||||
5.5 30.25
|
||||
5.6000000000000005 31.360000000000007
|
||||
5.7 32.49
|
||||
5.800000000000001 33.64000000000001
|
||||
5.9 34.81
|
||||
6.0 36.0
|
||||
6.1000000000000005 37.21000000000001
|
||||
6.2 38.440000000000005
|
||||
6.300000000000001 39.69000000000001
|
@@ -0,0 +1,16 @@
|
||||
6.4 40.96000000000001
|
||||
6.5 42.25
|
||||
6.6000000000000005 43.56000000000001
|
||||
6.7 44.89
|
||||
6.800000000000001 46.24000000000001
|
||||
6.9 47.61000000000001
|
||||
7.0 49.0
|
||||
7.1000000000000005 50.41000000000001
|
||||
7.2 51.84
|
||||
7.300000000000001 53.29000000000001
|
||||
7.4 54.760000000000005
|
||||
7.5 56.25
|
||||
7.6000000000000005 57.760000000000005
|
||||
7.7 59.290000000000006
|
||||
7.800000000000001 60.84000000000001
|
||||
7.9 62.410000000000004
|
@@ -0,0 +1,16 @@
|
||||
8.0 64.0
|
||||
8.1 65.61
|
||||
8.200000000000001 67.24000000000002
|
||||
8.3 68.89000000000001
|
||||
8.4 70.56
|
||||
8.5 72.25
|
||||
8.6 73.96
|
||||
8.700000000000001 75.69000000000001
|
||||
8.8 77.44000000000001
|
||||
8.9 79.21000000000001
|
||||
9.0 81.0
|
||||
9.1 82.80999999999999
|
||||
9.200000000000001 84.64000000000001
|
||||
9.3 86.49000000000001
|
||||
9.4 88.36000000000001
|
||||
9.5 90.25
|
@@ -0,0 +1,4 @@
|
||||
9.600000000000001 92.16000000000003
|
||||
9.700000000000001 94.09000000000002
|
||||
9.8 96.04000000000002
|
||||
9.9 98.01
|
@@ -0,0 +1,100 @@
|
||||
0.0 0.0
|
||||
0.1 0.010000000000000002
|
||||
0.2 0.04000000000000001
|
||||
0.30000000000000004 0.09000000000000002
|
||||
0.4 0.16000000000000003
|
||||
0.5 0.25
|
||||
0.6000000000000001 0.3600000000000001
|
||||
0.7000000000000001 0.4900000000000001
|
||||
0.8 0.6400000000000001
|
||||
0.9 0.81
|
||||
1.0 1.0
|
||||
1.1 1.2100000000000002
|
||||
1.2000000000000002 1.4400000000000004
|
||||
1.3 1.6900000000000002
|
||||
1.4000000000000001 1.9600000000000004
|
||||
1.5 2.25
|
||||
1.6 2.5600000000000005
|
||||
1.7000000000000002 2.8900000000000006
|
||||
1.8 3.24
|
||||
1.9000000000000001 3.6100000000000003
|
||||
2.0 4.0
|
||||
2.1 4.41
|
||||
2.2 4.840000000000001
|
||||
2.3000000000000003 5.290000000000001
|
||||
2.4000000000000004 5.760000000000002
|
||||
2.5 6.25
|
||||
2.6 6.760000000000001
|
||||
2.7 7.290000000000001
|
||||
2.8000000000000003 7.840000000000002
|
||||
2.9000000000000004 8.410000000000002
|
||||
3.0 9.0
|
||||
3.1 9.610000000000001
|
||||
3.2 10.240000000000002
|
||||
3.3000000000000003 10.890000000000002
|
||||
3.4000000000000004 11.560000000000002
|
||||
3.5 12.25
|
||||
3.6 12.96
|
||||
3.7 13.690000000000001
|
||||
3.8000000000000003 14.440000000000001
|
||||
3.9000000000000004 15.210000000000003
|
||||
4.0 16.0
|
||||
4.1000000000000005 16.810000000000006
|
||||
4.2 17.64
|
||||
4.3 18.49
|
||||
4.4 19.360000000000003
|
||||
4.5 20.25
|
||||
4.6000000000000005 21.160000000000004
|
||||
4.7 22.090000000000003
|
||||
4.800000000000001 23.040000000000006
|
||||
4.9 24.010000000000005
|
||||
5.0 25.0
|
||||
5.1000000000000005 26.010000000000005
|
||||
5.2 27.040000000000003
|
||||
5.300000000000001 28.090000000000007
|
||||
5.4 29.160000000000004
|
||||
5.5 30.25
|
||||
5.6000000000000005 31.360000000000007
|
||||
5.7 32.49
|
||||
5.800000000000001 33.64000000000001
|
||||
5.9 34.81
|
||||
6.0 36.0
|
||||
6.1000000000000005 37.21000000000001
|
||||
6.2 38.440000000000005
|
||||
6.300000000000001 39.69000000000001
|
||||
6.4 40.96000000000001
|
||||
6.5 42.25
|
||||
6.6000000000000005 43.56000000000001
|
||||
6.7 44.89
|
||||
6.800000000000001 46.24000000000001
|
||||
6.9 47.61000000000001
|
||||
7.0 49.0
|
||||
7.1000000000000005 50.41000000000001
|
||||
7.2 51.84
|
||||
7.300000000000001 53.29000000000001
|
||||
7.4 54.760000000000005
|
||||
7.5 56.25
|
||||
7.6000000000000005 57.760000000000005
|
||||
7.7 59.290000000000006
|
||||
7.800000000000001 60.84000000000001
|
||||
7.9 62.410000000000004
|
||||
8.0 64.0
|
||||
8.1 65.61
|
||||
8.200000000000001 67.24000000000002
|
||||
8.3 68.89000000000001
|
||||
8.4 70.56
|
||||
8.5 72.25
|
||||
8.6 73.96
|
||||
8.700000000000001 75.69000000000001
|
||||
8.8 77.44000000000001
|
||||
8.9 79.21000000000001
|
||||
9.0 81.0
|
||||
9.1 82.80999999999999
|
||||
9.200000000000001 84.64000000000001
|
||||
9.3 86.49000000000001
|
||||
9.4 88.36000000000001
|
||||
9.5 90.25
|
||||
9.600000000000001 92.16000000000003
|
||||
9.700000000000001 94.09000000000002
|
||||
9.8 96.04000000000002
|
||||
9.9 98.01
|
14
language_learning/2021.05.10_parallel_calculations_using_sh_files/task.sh
Executable file
14
language_learning/2021.05.10_parallel_calculations_using_sh_files/task.sh
Executable file
@@ -0,0 +1,14 @@
|
||||
#!/bin/sh
|
||||
|
||||
for job_index in 0 1 2 3 4 5 6
|
||||
do
|
||||
|
||||
cp a.py a${job_index}.py
|
||||
sed -i "s/job_index = -1/job_index = ${job_index}/" a${job_index}.py
|
||||
|
||||
|
||||
cp a.sh a${job_index}.sh
|
||||
sed -i "s/python a.py/python a${job_index}.py/" a${job_index}.sh
|
||||
qsub a${job_index}.sh
|
||||
|
||||
done
|
@@ -0,0 +1,24 @@
|
||||
import os
|
||||
|
||||
def find_files_and_directory(path):
|
||||
file = []
|
||||
directory = []
|
||||
for path in path:
|
||||
filenames = os.listdir(path)
|
||||
for filename in filenames:
|
||||
filename = os.path.join(path,filename)
|
||||
if os.path.isfile(filename): # 如果是文件
|
||||
file.append(filename)
|
||||
else: # 如果是文件夹
|
||||
directory.append(filename)
|
||||
return file, directory
|
||||
|
||||
file, directory = find_files_and_directory(['E:/zotero/storage'])
|
||||
|
||||
i0 = 0
|
||||
for path in directory:
|
||||
file, directory = find_files_and_directory([path])
|
||||
if len(file)!=3:
|
||||
i0 += 1
|
||||
print(path, '文件夹中有', len(file), '个文件')
|
||||
print('文件夹中文件个数不等于3的总个数:', i0)
|
@@ -0,0 +1,89 @@
|
||||
"""
|
||||
This code is supported by the website: https://www.guanjihuan.com
|
||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/13623
|
||||
"""
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.request import urlopen
|
||||
import re
|
||||
from collections import Counter
|
||||
import datetime
|
||||
|
||||
|
||||
"""
|
||||
支持APS系列的首页和recent页面。
|
||||
例如PRB期刊:https://journals.aps.org/prb 和 https://journals.aps.org/prb/recent
|
||||
请勿大量、循环运行爬虫,防止IP地址被官网拉入黑名单。
|
||||
"""
|
||||
|
||||
|
||||
# 访问链接
|
||||
visit_link = "https://journals.aps.org/prb"
|
||||
# visit_link = "https://journals.aps.org/prb/recent"
|
||||
# visit_link = "https://journals.aps.org/prl"
|
||||
# visit_link = "https://journals.aps.org/prl/recent"
|
||||
|
||||
|
||||
# 获取文章链接
|
||||
html = urlopen(visit_link).read().decode('utf-8')
|
||||
soup = BeautifulSoup(html, features='lxml')
|
||||
all_a_tag = soup.find_all('a', href=True)
|
||||
match_href = []
|
||||
for a_tag in all_a_tag:
|
||||
href = a_tag['href']
|
||||
if re.search('https://journals.aps.org/.*/abstract', href) or re.search('.*/abstract/', href):
|
||||
if href not in match_href and re.search('\?', href)==None:
|
||||
if re.search('https://journals.aps.org', href)==None:
|
||||
href = 'https://journals.aps.org'+ href
|
||||
match_href.append(href)
|
||||
print('\n当前页面总共有', len(match_href), '篇文章。\n')
|
||||
|
||||
|
||||
# 获取文章中的摘要内容
|
||||
i0 = 0
|
||||
year = datetime.datetime.now().year
|
||||
month = datetime.datetime.now().month
|
||||
day = datetime.datetime.now().day
|
||||
f = open(str(year)+'.'+str(month).rjust(2,'0')+'.'+str(day).rjust(2,'0')+'_word_list.txt', 'w', encoding='UTF-8')
|
||||
ignore_inner = ['alt="Figure', 'the', '<math', 'to', 'of', 'in', 'under', '<i', 'and', 'by', 'The', 'at', 'with', 'up', 'be', 'above', 'below', 'are', 'is', 'for', 'that', 'as', 'we', '<a', 'abstract', 'abstract"','<span', 'which', 'We', 'such', 'has', 'two', 'these', 'it', 'all', 'results', 'result', 'each', 'have', 'between', 'on', 'an', 'can', 'also', 'from', 'Our', 'our', 'using', 'where', 'These', 'out', 'both', 'due', 'less', 'along', 'but', 'In', 'show', 'into', 'study', 'find', 'provide', 'change', '(<math','not', 'open', 'this', 'show', 'into', 'study', 'find', 'provide', 'change', 'present', 'Using', 'large', 'This', 'However', 'appear', 'studied', 'obtain', 'been', 'Both', 'they', 'effects', 'effect', 'compute', 'more', 'does', 'shown', 'Based', 'reveal', 'highly', 'number', 'However,', 'was', 'near', 'full', 'based', 'several', 'suggest', 'agreement', 'predicted', 'values', 'work', 'emphasize', 'without', 'or', 'work,', 'studies', 'future', 'identify', 'present.', 'predict', 'presence', 'their', 'were', 'From', 'its', 'By', 'how', 'ground', 'observed', 'recent', 'For', 'other', 'Here', 'test', 'further', 'Its', 'similar', 'however,', 'range', 'within', 'value', 'possible', 'may', 'than', 'low', 'us', 'obtained', 'around', 'consider', 'about', 'very', 'will', 'when', 'played', 'consist', 'consists', 'Here,', 'observe', 'gives', 'It', 'over', 'cannot', 'As', 'whose', 'new', 'some', 'only', 'from', 'yields', 'shows', 'data', 'direct', 'related', 'different', 'evidence', 'role', 'function', 'origin', 'specific', 'set', 'confirm', 'give', 'Moreover', 'develop', 'including', 'could', 'used', 'means', 'allows', 'make', 'e.g.,', 'provides', 'system', 'systems', 'field', 'fields', 'model', 'model,', 'state', 'states', 'states.', 'state.', 'band', 'bands', 'method', 'methods', 'nature', 'rate', 'zero', 'single', 'theory', 'first', 'one', 'complex', 'approach', 'schemes', 'terms', 'even', 'case', 'analysis', 'weight', 'volume', 'evolution', 'well', 'external', 'measured', 'introducing', 'dependence', 'properties', 'demonstrate', 'remains', 'through', 'measurements', 'samples', 'findings', 'respect', 'investigate', 'behavior', 'importance', 'considered', 'experimental', 'increase', 'propose', 'follows', 'increase', 'emerged', 'interesting', 'behaviors', 'influenced', 'paramount', 'indicate', 'Rev.', 'concepts', 'induced', 'zone', 'regions', 'exact', 'contribution', 'behavior', 'formation', 'measurements.', 'utilizing', 'constant', 'regime', 'features', 'strength', 'compare', 'determined', 'combination', 'compare', 'determined', 'At', 'inside', 'ambient', 'then', 'important', 'report', 'Moreover,', 'Despite', 'found', 'because', 'process', 'and,', 'significantly', 'realized', 'much', 'natural', 'since', 'grows', 'any', 'compared', 'while', 'forms.', 'appears', 'indicating', 'coefficient', 'suggested', 'time', 'exhibits', 'calculations.', 'developed', 'array', 'discuss', 'field', 'becomes', 'allowing', 'indicates', 'via', 'introduce', 'considering', 'times.', 'constructed', 'explain', 'form', 'owing', 'parameters.', 'parameter', 'operation', 'probe', 'experiments', 'interest', 'strategies', 'seen', 'emerge', 'generic', 'geometry', 'numbers', 'observation', 'avenue', 'theretically', 'three', 'excellent', 'amount', 'notable', 'example', 'being', 'promising', 'latter', 'little', 'imposed', 'put', 'resource', 'together', 'produce', 'successfully','there', 'enhanced', 'this', 'great', 'dirven', 'increasing','should', 'otherwise', 'Further', 'field,', 'known', 'changes', 'still', 'beyond', 'various', 'center', 'previously', 'way', 'peculiar', 'detailed', 'understanding', 'good', 'years', 'where', 'Me', 'origins', 'years.', 'attributed', 'known,', 'them', 'reported', 'no', 'systems', 'agree', 'examined', 'rise', 'calculate', 'those', 'particular', 'relation', 'defined', 'either', 'again', 'current', 'exhibit', 'calculated', 'here', 'made', 'Further', 'consisting', 'constitutes', 'originated', 'if', 'exceed', 'access']
|
||||
for href in match_href:
|
||||
i0 += 1
|
||||
print('正在读取第', i0, '篇')
|
||||
html = urlopen(href).read().decode('utf-8')
|
||||
abstract = re.findall('<a name="abstract">.*<li>Received', html, re.S)[0]
|
||||
word_list = abstract.split(' ')
|
||||
word_list_for_one_href = []
|
||||
for word in word_list:
|
||||
if 1<len(word)<35 and word not in ignore_inner and re.search('class=', word)==None and re.search('data-', word)==None and re.search('><', word)==None:
|
||||
if word not in word_list_for_one_href: # 每篇文章的某个词汇只统计一次
|
||||
word_list_for_one_href.append(word)
|
||||
f.write(str(word)+' ')
|
||||
f.close()
|
||||
|
||||
|
||||
"""
|
||||
运行一次后,以上的代码可以注释,不需要多次访问网址。
|
||||
以下代码调用的是上面代码生成的txt文件,可个性选择忽略的词汇,多次运行调试。
|
||||
"""
|
||||
|
||||
|
||||
# 个性选择忽略的词汇(根据需要增删)
|
||||
ignore = []
|
||||
year = datetime.datetime.now().year
|
||||
month = datetime.datetime.now().month
|
||||
day = datetime.datetime.now().day
|
||||
with open(str(year)+'.'+str(month).rjust(2,'0')+'.'+str(day).rjust(2,'0')+'_word_list.txt', 'r', encoding='UTF-8') as f:
|
||||
word_list = f.read().split(' ')
|
||||
effective_words = []
|
||||
for word in word_list:
|
||||
if 1<len(word)<35 and word not in ignore:
|
||||
effective_words.append(word)
|
||||
|
||||
|
||||
# 统计出现最多的n个词汇
|
||||
num = 50
|
||||
most_common_words = Counter(effective_words).most_common(num)
|
||||
print('\n出现频率最高的前', num, '个词汇:')
|
||||
for word in most_common_words:
|
||||
print(word)
|
||||
print()
|
@@ -0,0 +1,59 @@
|
||||
"""
|
||||
This code is supported by the website: https://www.guanjihuan.com
|
||||
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/13623
|
||||
"""
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
from urllib.request import urlopen
|
||||
import re
|
||||
from collections import Counter
|
||||
import datetime
|
||||
import random
|
||||
import time
|
||||
|
||||
ignore_inner = ['alt="Figure', 'the', '<math', 'to', 'of', 'in', 'under', '<i', 'and', 'by', 'The', 'at', 'with', 'up', 'be', 'above', 'below', 'are', 'is', 'for', 'that', 'as', 'we', '<a', 'abstract', 'abstract"','<span', 'which', 'We', 'such', 'has', 'two', 'these', 'it', 'all', 'results', 'result', 'each', 'have', 'between', 'on', 'an', 'can', 'also', 'from', 'Our', 'our', 'using', 'where', 'These', 'out', 'both', 'due', 'less', 'along', 'but', 'In', 'show', 'into', 'study', 'find', 'provide', 'change', '(<math','not', 'open', 'this', 'show', 'into', 'study', 'find', 'provide', 'change', 'present', 'Using', 'large', 'This', 'However', 'appear', 'studied', 'obtain', 'been', 'Both', 'they', 'effects', 'effect', 'compute', 'more', 'does', 'shown', 'Based', 'reveal', 'highly', 'number', 'However,', 'was', 'near', 'full', 'based', 'several', 'suggest', 'agreement', 'predicted', 'values', 'work', 'emphasize', 'without', 'or', 'work,', 'studies', 'future', 'identify', 'present.', 'predict', 'presence', 'their', 'were', 'From', 'its', 'By', 'how', 'ground', 'observed', 'recent', 'For', 'other', 'Here', 'test', 'further', 'Its', 'similar', 'however,', 'range', 'within', 'value', 'possible', 'may', 'than', 'low', 'us', 'obtained', 'around', 'consider', 'about', 'very', 'will', 'when', 'played', 'consist', 'consists', 'Here,', 'observe', 'gives', 'It', 'over', 'cannot', 'As', 'whose', 'new', 'some', 'only', 'from', 'yields', 'shows', 'data', 'direct', 'related', 'different', 'evidence', 'role', 'function', 'origin', 'specific', 'set', 'confirm', 'give', 'Moreover', 'develop', 'including', 'could', 'used', 'means', 'allows', 'make', 'e.g.,', 'provides', 'system', 'systems', 'field', 'fields', 'model', 'model,', 'state', 'states', 'states.', 'state.', 'band', 'bands', 'method', 'methods', 'nature', 'rate', 'zero', 'single', 'theory', 'first', 'one', 'complex', 'approach', 'schemes', 'terms', 'even', 'case', 'analysis', 'weight', 'volume', 'evolution', 'well', 'external', 'measured', 'introducing', 'dependence', 'properties', 'demonstrate', 'remains', 'through', 'measurements', 'samples', 'findings', 'respect', 'investigate', 'behavior', 'importance', 'considered', 'experimental', 'increase', 'propose', 'follows', 'increase', 'emerged', 'interesting', 'behaviors', 'influenced', 'paramount', 'indicate', 'Rev.', 'concepts', 'induced', 'zone', 'regions', 'exact', 'contribution', 'behavior', 'formation', 'measurements.', 'utilizing', 'constant', 'regime', 'features', 'strength', 'compare', 'determined', 'combination', 'compare', 'determined', 'At', 'inside', 'ambient', 'then', 'important', 'report', 'Moreover,', 'Despite', 'found', 'because', 'process', 'and,', 'significantly', 'realized', 'much', 'natural', 'since', 'grows', 'any', 'compared', 'while', 'forms.', 'appears', 'indicating', 'coefficient', 'suggested', 'time', 'exhibits', 'calculations.', 'developed', 'array', 'discuss', 'field', 'becomes', 'allowing', 'indicates', 'via', 'introduce', 'considering', 'times.', 'constructed', 'explain', 'form', 'owing', 'parameters.', 'parameter', 'operation', 'probe', 'experiments', 'interest', 'strategies', 'seen', 'emerge', 'generic', 'geometry', 'numbers', 'observation', 'avenue', 'theretically', 'three', 'excellent', 'amount', 'notable', 'example', 'being', 'promising', 'latter', 'little', 'imposed', 'put', 'resource', 'together', 'produce', 'successfully','there', 'enhanced', 'this', 'great', 'dirven', 'increasing','should', 'otherwise', 'Further', 'field,', 'known', 'changes', 'still', 'beyond', 'various', 'center', 'previously', 'way', 'peculiar', 'detailed', 'understanding', 'good', 'years', 'where', 'Me', 'origins', 'years.', 'attributed', 'known,', 'them', 'reported', 'no', 'systems', 'agree', 'examined', 'rise', 'calculate', 'those', 'particular', 'relation', 'defined', 'either', 'again', 'current', 'exhibit', 'calculated', 'here', 'made', 'Further', 'consisting', 'constitutes', 'originated', 'if', 'exceed', 'access']
|
||||
num = 50
|
||||
year = datetime.datetime.now().year
|
||||
month = datetime.datetime.now().month
|
||||
day = datetime.datetime.now().day
|
||||
for loop in range(2):
|
||||
if loop == 0:
|
||||
visit_link = "https://journals.aps.org/prb/recent"
|
||||
with open('prb_recent_most_common_words.txt', 'r', encoding='UTF-8') as f0:
|
||||
content_before = f0.read()
|
||||
f = open('prb_recent_most_common_words.txt', 'w', encoding='UTF-8')
|
||||
elif loop == 1:
|
||||
visit_link = "https://journals.aps.org/prl/recent"
|
||||
with open('prl_recent_most_common_words.txt', 'r', encoding='UTF-8') as f0:
|
||||
content_before = f0.read()
|
||||
f = open('prl_recent_most_common_words.txt', 'w', encoding='UTF-8')
|
||||
html = urlopen(visit_link).read().decode('utf-8')
|
||||
soup = BeautifulSoup(html, features='lxml')
|
||||
all_a_tag = soup.find_all('a', href=True)
|
||||
match_href = []
|
||||
for a_tag in all_a_tag:
|
||||
href = a_tag['href']
|
||||
if re.search('https://journals.aps.org/.*/abstract', href) or re.search('.*/abstract/', href):
|
||||
if href not in match_href and re.search('\?', href)==None:
|
||||
if re.search('https://journals.aps.org', href)==None:
|
||||
href = 'https://journals.aps.org'+ href
|
||||
match_href.append(href)
|
||||
all_word_list = []
|
||||
for href in match_href:
|
||||
time.sleep(random.uniform(0,2)) # 爬虫休息一秒左右,简单伪装
|
||||
html = urlopen(href).read().decode('utf-8')
|
||||
abstract = re.findall('<a name="abstract">.*<li>Received', html, re.S)[0]
|
||||
word_list = abstract.split(' ')
|
||||
word_list_for_one_href = []
|
||||
for word in word_list:
|
||||
if 1<len(word)<35 and word not in ignore_inner and re.search('class=', word)==None and re.search('data-', word)==None and re.search('<', word)==None and re.search('>', word)==None and re.search('href', word)==None:
|
||||
if word not in word_list_for_one_href:
|
||||
word_list_for_one_href.append(word)
|
||||
all_word_list.append(word)
|
||||
most_common_words = Counter(all_word_list).most_common(num)
|
||||
f.write(str(year)+'.'+str(month).rjust(2,'0')+'.'+str(day).rjust(2,'0')+'|number_of_papers='+str(len(match_href)))
|
||||
for word in most_common_words:
|
||||
f.write('|'+str(word))
|
||||
f.write('\n\n')
|
||||
f.write(content_before)
|
||||
f.close()
|
Reference in New Issue
Block a user