This commit is contained in:
2021-06-09 04:38:18 +08:00
commit 14a297b604
137 changed files with 8965 additions and 0 deletions

View File

@@ -0,0 +1,76 @@
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/417
"""
import numpy as np
# Python基本操作【循环判断函数文件写入】
for i in range(5): # 循环这里只举例for循环要了解while循环可自行搜索资料
print('我是循环产生的数:', i) # Python中没有end所以缩进很重要不能省
if i == 2: # 判断
print('判断:我是第三个数 2')
else:
pass # pass代表不执行任何语句用于占位可以之后再补充不然空着会报错
print() # 空一行
def fun0(arg): # 定义函数
print('我是函数中的内容,参数值为:', arg) # \n代表换行
return arg*2 # 返回值
print('函数返回值:', fun0(5), '\n') # 调用函数
# 关于类class这里不举例了。科学计算中主要还是面向过程面向对象用的比较少。有需要了解的可以自行搜索资料。
# 文件写入
# 第一种方式
with open('test1.txt', 'w') as f1: # 其中'w'为重新写入,改为'a'是补充内容
f1.write(str(100)+'\n这是第一种方式写入文件') # str()为转换成字符串
# 第二种方式
f2 = open('test2.txt', 'w') # 打开文件
f2.write(str(200)+'\n这是第二种方式写入文件') # 写入文件
f2.close() # 关闭文件
# Numpy库中常用的语句
print('零矩阵:\n', np.zeros((2, 3))) # 注意np.zeros()里需要填元组,因此是两个括号
print('单位矩阵:\n', np.identity(3)) # 3行3列的单位矩阵,或者可以用np.eye()
print('把一维数组按对角矩阵排列:\n', np.diag([1, 3, 5]), '\n')
print('指定步长的等差数列:\n', np.arange(1, 5, .5)) # 区间是左闭右开[1, 5)
print('指定个数的等差数列:\n', np.linspace(-2, 2, 5), '\n') # 区间是左闭右闭[-2, 2], 数量是5
print('随机数:\n', np.random.uniform(-2, 2)) # 区间是左闭右开[-2, 2)
print('随机整数:\n', np.random.randint(-10, 10), '\n') # 区间是左闭右闭[-10, 10]
print('数组从小到大排列:\n', np.sort([1, 7, 0, 3]))
print('数组从小到大排列对应的索引:\n', np.argsort([1, 7, 0, 3]), '\n') # 注意Python中下标是从0开始的
matrix0 = np.array([[1, 2+9j, 3], [2, 5, 7]])
print('矩阵0\n', matrix0)
print('矩阵的维度:\n', matrix0.shape) # 查看矩阵的维度
print('矩阵的行数:\n', matrix0.shape[0]) # 查看矩阵的行数
print('矩阵的列数:\n', matrix0.shape[1]) # 查看矩阵的列数
print('矩阵转置:\n', matrix0.transpose()) # 矩阵转置
print('矩阵转置共轭:\n', matrix0.transpose().conj(), '\n') # 矩阵转置共轭
matrix1 = np.array([[3, 5], [2, 7]]) # numpy数组
eigenvalue, eigenvector = np.linalg.eig(matrix1) # 求本征值,本征向量
print('矩阵1\n', matrix1)
print('本征值:\n', eigenvalue)
print('本征向量:\n', eigenvector) # 列向量对应的是本征矢量
print('逆矩阵:\n', np.linalg.inv(matrix1))
print('计算行列式:\n', np.linalg.det(matrix1), '\n')
matrix2 = np.array([[1, 2], [3, 4]])
print('矩阵2\n', matrix2)
print('矩阵1和矩阵2相乘\n', np.matmul(matrix1, matrix2), '\n') # 矩阵乘积或者可以用np.dot()
a = np.array([1, 2])
print('数组a=', a)
b = np.array([3, 4])
print('数组b=', b)
print('增加元素:\n', np.append(a, b, axis=0)) # 增加元素
print('增加行:\n', np.append([a], [b], axis=0)) # 增加行列数要相同或者用np.row_stack(([a], [b]))
print('增加列:\n', np.append([a], [b], axis=1)) # 增加列行数要相同或者用np.column_stack(([a], [b]))

View File

@@ -0,0 +1,39 @@
import tensorflow as tf # 导入tensorflow
greeting = tf.constant('Hello Google Tensorflow!') # 定义一个常量
# 第一种方式
sess = tf.Session() # 启动一个会话
result = sess.run(greeting) # 使用会话执行greeting计算模块
print(result) # 打印显示
sess.close() # 关闭会话
# 第二种方式
with tf.Session() as sess: # 启动一个会话
print(sess.run(greeting)) # 打印显示
# 例子1
matrix1 = tf.constant([[1., 3.]]) # 定义常数矩阵1 tf.constant()
matrix2 = tf.constant([[2.], [2.]]) # 定义常数矩阵2 tf.constant()
product = tf.matmul(matrix1, matrix2) # 矩阵乘积 tf.matmul()
linear = tf.add(product, tf.constant(2.)) # 矩阵乘积后再加上一个常数 tf.add()
with tf.Session() as sess: # 启动一个会话 tf.Session()
print(sess.run(matrix1)) # 执行语句并打印显示 tf.Session().run
print(sess.run(linear)) # 执行语句并打印显示 tf.Session().run
print(linear) # 直接打印是不能看到计算结果的因为还未执行只是一个张量。这里打印显示的结果是Tensor("Add:0", shape=(1, 1), dtype=float32)
# 例子2变量tf.Variable()
state = tf.Variable(3, name='counter') # 变量tf.Variable
init = tf.global_variables_initializer() # 如果定义了变量,后面一定要有这个语句,用来初始化变量。
with tf.Session() as sess:
sess.run(init) # 变量一定要初始化变量
print(sess.run(state)) # 执行语句并打印显示
# 例子3占位符tf.placeholder()用来临时占坑需要用feed_dict来传入数值。
x1 = tf.placeholder(tf.float32)
x2 = tf.placeholder(tf.float32)
y = x1 + x2
with tf.Session() as sess:
print(sess.run(y, feed_dict={x1: 7, x2: 2}))

View File

@@ -0,0 +1,77 @@
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
def add_layer(inputs, in_size, out_size, activation_function=None): # 定义一层的所有神经元
Weights = tf.Variable(tf.random_normal([in_size, out_size])) # 定义Weights为tf变量并给予初值
biases = tf.Variable(tf.zeros([1, out_size]) + 0.1) # 定义biases为tf变量并给予初值
Wx_plus_b = tf.matmul(inputs, Weights) + biases # 得分
if activation_function is None: # 没有激活函数
outputs = Wx_plus_b
else:
outputs = activation_function(Wx_plus_b) # 使用激活函数
return outputs # 返回该层每个神经元的输出值维度为out_size
# 产生训练的数据
x_data = np.linspace(-1, 1, 300, dtype=np.float32)[:, np.newaxis] # 产生数据,作为神经网络的输入数据。注:[:, np.newaxis]是用来增加一个轴,变成一个矩阵。
noise = np.random.normal(0, 0.05, x_data.shape).astype(np.float32) # 产生噪声
y_data = np.square(x_data) - 0.5 + noise # x_data加上噪声作为神经网络的输出数据。
print(x_data.shape) # 查看数据维度
print(noise.shape) # 查看数据维度
print(y_data.shape) # 查看数据维度
print() # 打印输出空一行
# 神经网络模型的建立
xs = tf.placeholder(tf.float32, [None, 1]) # 定义占位符为神经网络训练的输入数据。这里的None代表无论输入有多少数据都可以
ys = tf.placeholder(tf.float32, [None, 1]) # 定义占位符,为神经网络训练的输出数据。
l1 = add_layer(xs, 1, 10, activation_function=tf.nn.relu) # 增加一个隐藏层
prediction = add_layer(l1, 10, 1, activation_function=None) # 输出层
loss = tf.reduce_mean(tf.reduce_sum(tf.square(ys - prediction), reduction_indices=[1])) # 损失函数
train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss) # 梯度下降
init = tf.global_variables_initializer() # 变量初始化
# 画出原始的输入输出数据点图
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.scatter(x_data, y_data)
plt.ion() # 开启交互模式
plt.show() # 显示图像
# 训练神经网络模型
sess = tf.Session() # 启动一个会话
sess.run(init) # 初始化变量
for i in range(1000): # 训练1000次
sess.run(train_step, feed_dict={xs: x_data, ys: y_data}) # 喂数据梯度下降循环1000次。
if i % 50 == 0: # 每训练50次画一下图
try: # to visualize the result and improvement
ax.lines.remove(lines[0])
except Exception:
pass
prediction_value = sess.run(prediction, feed_dict={xs: x_data}) # 神经网络预测的值
print('loss=', sess.run(loss, feed_dict={xs: x_data, ys: y_data})) # 打印输出,查看损失函数下降情况
print('prediction=', sess.run(prediction, feed_dict={xs: [x_data[0, :]]})) # # 打印输出神经网络预测的值
print() # 打印空一行
lines = ax.plot(x_data, prediction_value, 'r-', lw=5) # 画出预测的值,用线连起来
plt.pause(.1) # 暂停0.1,防止画图过快看不清。
plt.ioff() # 关闭交互模式,再画一次图。作用是不让图自动关掉。
lines = ax.plot(x_data, prediction_value, 'r-', lw=5)
plt.show()
# 保存训练好的神经网络模型tf.train.Saver()
saver = tf.train.Saver()
save_path = saver.save(sess, "my_net/save_net.ckpt") # 保存模型
print("Save to path: ", save_path)
print()
sess.close() # 关闭会话
# 调用神经网络模型,来预测新的值
with tf.Session() as sess2:
saver.restore(sess2, "my_net/save_net.ckpt") # 提取模型中的所有变量
print(y_data[0, :]) # 输出的原始值
print(sess2.run(prediction, feed_dict={xs: [x_data[0, :]]})) # 预测值

View File

@@ -0,0 +1,359 @@
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/703
"""
import pygame
import random
import math
import numpy as np
# 参数
screen_width = 1500 # 屏幕宽度
screen_height = 900 # 屏幕高度
map_width = screen_width*4 # 地图的大小
map_height = screen_height*4 # 地图的大小
number_enemy = map_width*map_height/500000 # 敌人的数量
number_dots = map_width * map_height / 50 # 点点的数量
max_show_size = 100 # 球显示的最大半径(屏幕有限,球再增大时,改变的地图比例尺寸)
my_value = 1000 # 我的初始值
enemy_value_low = 500 # 敌人的初始值(最低)
enemy_value_high = 1500 # 敌人的初始值(最高)
dot_value = 30 # 点点的值(地上的豆豆/食物值)
my_speed = 10 # 我的球运动的速度
speed_up = 20 # 按下鼠标时加速
speed_enemy = 10 # 敌人球正常运动速度
speed_enemy_anomaly = 20 # 敌人突然加速时的速度(速度异常时的速度)
anomaly_pro = 0.5 # 敌人加速的概率
change_pro = 0.05 # 敌人移动路径变化的概率也就是1/change_pro左右会变化一次
eat_percent = 0.9 # 吃掉敌人的球按多少比例并入自己的体积1对应的是100%
loss = 0.001 # 按比例减小体重此外越重的减少越多10万体积损失值为loss的一倍
enemy_bigger_pro = 0.0005 # 敌人的值增加了我的球的值的enemy_bigger_rate倍的几率
enemy_bigger_rate = 0.1 # 增加我的球的体积的enemy_bigger_rate倍
class Color(object): # 定义颜色的类
@classmethod # 加了这个可以不需要把实例化,能直接调用类的方法
def random_color(cls): # cls, 即class表示可以通过类名直接调用
red = random.randint(0, 255)
green = random.randint(0, 255)
blue = random.randint(0, 255)
return red, green, blue
class Ball(object): # 定义球
def __init__(self, x, y, sx, sy, color, value): # 初始化
self.x = x # 球的地图位置参数
self.y = y
self.sx = sx # 速度参数
self.sy = sy
self.color = color # 颜色
self.value = value # 球的值,也就是球的大小(不是显示的大小)
self.is_alive = True # 球默认是存活状态
class My_Ball(Ball): # 定义我的球继承了Ball类的方法
def __init__(self, x, y, sx, sy, color, value):
# 注意如果重写了__init__() 时实例化子类就不会调用父类已经定义的__init__()
# 如果子类不重写__init__()方法实例化子类后会自动调用父类的__init__()的方法
# 如果子类重写__init__()方法又需要调用父类的方法则要使用super关键词。
super().__init__(x, y, sx, sy, color, value) # 调用父类Ball的初始化方法__init__()
self.radius = int(self.value**0.5) # 我的球的半径不考虑系数pi
if self.radius >= max_show_size: # 如果半径比规定的最大半径还大,则显示最大半径
self.show_radius = max_show_size # 我的球显示的半径
else:
self.show_radius = self.radius # 如果半径没有超过规定最大的半径,则显示原来实际大小的半径
self.position_x = int(screen_width/2) # 把我的球固定在屏幕中间position_x是屏幕显示的位置
self.position_y = int(screen_height/2) # 把我的球固定在屏幕中间position_y是屏幕显示的位置
def draw(self, window): # 把我的球画出来
self.radius = int(self.value ** 0.5) # 这里重复上面的,因为除了初始化之后,还要更新
if self.radius >= max_show_size:
self.show_radius = max_show_size
else:
self.show_radius = self.radius
self.position_x = int(screen_width / 2)
self.position_y = int(screen_height / 2)
pygame.draw.circle(window, self.color, (self.position_x , self.position_y), self.show_radius)
def eat_ball(self, other): # 吃别的球(包括小点点和敌人)
if self != other and self.is_alive and other.is_alive: # 如果other不是自身自身和对方也都是存活状态则执行下面动作
distance = ((self.position_x - other.position_x) ** 2 + (self.position_y - other.position_y) ** 2) ** 0.5 # 两个球之间的距离
if distance < self.show_radius and (self.show_radius > other.show_radius or (self.show_radius == other.show_radius and self.value > other.value)): # 如果自身半径比别人大,而且两者距离小于自身半径,那么可以吃掉。
other.is_alive = False # 吃球(敌方已死)
self.value += other.value*eat_percent # 自己的值增大(体量增大)
self.radius = int(self.value ** 0.5) # 计算出半径
if self.radius >= max_show_size: # 我的球的显示半径
self.show_radius = max_show_size
else:
self.show_radius = self.radius
def move(self): # 移动规则
self.x += self.sx # 地图位置加上速度
self.y += self.sy
# 横向出界
if self.x < 0: # 离开了地图左边
self.x = 0
if self.x > map_width: # 离开了地图右边
self.x = map_width
# 纵向出界
if self.y <= 0: # 离开了地图下边
self.y = 0
if self.y >= map_height: # 离开了地图上边
self.y = map_height
class Enemy_Ball(Ball): # 定义敌人的球继承了Ball类的方法
def __init__(self, x, y, sx, sy, color, value, host_ball): # 初始化带上host_ball也就是我的球
super().__init__(x, y, sx, sy, color, value)
self.host_ball = host_ball
self.radius = int(self.value**0.5)
if self.host_ball.radius >= max_show_size: # 如果我的球比规定的最大尺寸还大,则敌人的球显示的比例要减小
self.show_radius = max(10, int(self.radius/(self.host_ball.radius/max_show_size))) # 敌人的球也不能太小最小半径为10
self.position_x = int((self.x - self.host_ball.x) / (self.host_ball.radius / max_show_size)) + int(
screen_width / 2) # 计算出敌人的球和我的球的相对位置,并且按比例减小
self.position_y = int((self.y - self.host_ball.y) / (self.host_ball.radius / max_show_size)) + int(
screen_height / 2) # 计算出敌人的球和我的球的相对位置,并且按比例减小
else:
self.show_radius = self.radius # 正常显示
self.position_x = (self.x - self.host_ball.x) + int(screen_width / 2) # 敌人和我的球的相对位置
self.position_y = (self.y - self.host_ball.y) + int(screen_height / 2) # 敌人和我的球的相对位置
# 画出球
def draw(self, window):
self.radius = int(self.value ** 0.5)
if self.host_ball.radius >= max_show_size: # 这边把初始化的内容再写一遍,因为敌人的球初始化之后还要根据我的球而动态改变
self.show_radius = max(10, int(self.radius/(self.host_ball.radius/max_show_size)))
self.position_x = int((self.x - self.host_ball.x) / (self.host_ball.radius / max_show_size)) + int(
screen_width / 2)
self.position_y = int((self.y - self.host_ball.y) / (self.host_ball.radius / max_show_size)) + int(
screen_height / 2)
else:
self.show_radius = self.radius
self.position_x = (self.x - self.host_ball.x) + int(screen_width / 2)
self.position_y = (self.y - self.host_ball.y) + int(screen_height / 2)
pygame.draw.circle(window, self.color, (self.position_x, self.position_y), self.show_radius)
def eat_ball(self, other):
if self != other and self.is_alive and other.is_alive:
distance = ((self.position_x - other.position_x) ** 2 + (self.position_y - other.position_y) ** 2) ** 0.5
if distance < self.show_radius and (self.show_radius > other.show_radius or (self.show_radius == other.show_radius and self.value > other.value)):
other.is_alive = False # 吃球
self.value += other.value*eat_percent
self.radius = int(self.value ** 0.5)
def move(self): # 移动规则
self.x += self.sx # 地图位置加上速度
self.y += self.sy
# 横向出界
if self.x < 0: # 离开了地图左边
self.sx = -self.sx
self.x = 0
if self.x > map_width: # 离开了地图右边
self.sx = -self.sx
self.x = map_width
# 纵向出界
if self.y <= 0: # 离开了地图下边
self.sy = -self.sy
self.y = 0
if self.y >= map_height: # 离开了地图上边
self.sy = -self.sy
self.y = map_height
class Dot_Ball(Ball): # 定义地上的小点点供自己的球和敌人的球吃继承了Ball类的方法
def __init__(self, x, y, sx, sy, color, value, host_ball):
super().__init__(x, y, sx, sy, color, value)
self.host_ball = host_ball
self.radius = 8 # 初始小点点大小
if self.host_ball.radius >= max_show_size:
self.show_radius = max(3, int(self.radius/(self.host_ball.radius/max_show_size))) # 小点点显示也不能太小最小显示半径为3
self.position_x = int((self.x - self.host_ball.x) / (self.host_ball.radius / max_show_size)) + int(
screen_width / 2)
self.position_y = int((self.y - self.host_ball.y) / (self.host_ball.radius / max_show_size)) + int(
screen_height / 2)
else:
self.show_radius = self.radius
self.position_x = (self.x - self.host_ball.x) + int(screen_width / 2)
self.position_y = (self.y - self.host_ball.y) + int(screen_height / 2)
# 画出球
def draw(self, window):
if self.host_ball.radius >= max_show_size: # 这边把初始化的内容再写一遍,因为小点点初始化之后还要根据我的球而动态改变
self.show_radius = max(3, int(self.radius/(self.host_ball.radius/max_show_size)))
self.position_x = int((self.x - self.host_ball.x) / (self.host_ball.radius / max_show_size)) + int(
screen_width / 2)
self.position_y = int((self.y - self.host_ball.y) / (self.host_ball.radius / max_show_size)) + int(
screen_height / 2)
else:
self.show_radius = self.radius
self.position_x = (self.x - self.host_ball.x) + int(screen_width / 2)
self.position_y = (self.y - self.host_ball.y) + int(screen_height / 2)
pygame.draw.circle(window, self.color, (self.position_x, self.position_y) , self.show_radius)
def creat_my_ball(): # 产生我的球
x = random.randint(0, map_width) # 我的球在地图中的位置,随机生成
y = random.randint(0, map_height)
value = my_value # 我的球的初始值
color = 255, 255, 255 # 我的球的颜色
sx = 0 # 速度默认为0
sy = 0
host_ball = My_Ball(x, y, sx, sy, color, value) # 调用My_Ball类
return host_ball # 返回我的球
def auto_creat_ball(balls, host_ball): # 自动产生敌人的球
if len(balls) <= number_enemy: # 控制敌人的数量,如果个数够了,就不再生成
x = random.randint(0, map_width) # 敌人球在地图中的位置,随机生成
y = random.randint(0, map_height)
value = random.randint(enemy_value_low, enemy_value_high) # 敌人的球初始值
sx = random.randint(-speed_enemy, speed_enemy) # 敌人的球移动速度
i2 = random.randint(0, 1) # y的移动方向
if i2 == 0:
sy = int((speed_enemy**2 - sx**2) ** 0.5)
else:
sy = -int((speed_enemy ** 2 - sx ** 2) ** 0.5)
color = Color.random_color() # 敌人的颜色随机生成
enemy = Enemy_Ball(x, y, sx, sy, color, value, host_ball)
balls.append(enemy)
def auto_creat_dots(dots, host_ball): # 自动生成点点
if len(dots) <= number_dots: # 控制点点的数量
x = random.randint(0, map_width) # 随机生成点点的位置
y = random.randint(0, map_height)
value = dot_value # 点点的值
sx = 0 # 点点速度为0
sy = 0
color = Color.random_color() # 颜色
dot = Dot_Ball(x, y, sx, sy, color, value, host_ball)
dots.append(dot)
def control_my_ball(host_ball): # 控制我的球
host_ball.move()
host_ball.value = host_ball.value*(1-loss*host_ball.value/100000)
for event in pygame.event.get(): # 监控事件(鼠标移动)
# print(event)
if event.type == pygame.MOUSEBUTTONDOWN:
pos = event.pos
speed = speed_up
elif event.type == pygame.MOUSEMOTION:
pos = event.pos
if event.buttons[0] == 1:
speed = speed_up
if event.buttons[0] == 0:
speed = my_speed
elif event.type == pygame.MOUSEBUTTONUP:
pos = event.pos
speed = my_speed
else:
pos = [screen_width/2, screen_height/2]
speed = my_speed
if abs(pos[0] - screen_width/2) < 30 and abs(pos[1] - screen_height/2) < 30:
host_ball.sx = 0
host_ball.sy = 0
elif pos[0] > screen_width/2 and pos[1] >= screen_height/2:
angle = abs(math.atan((pos[1] - screen_height/2) / (pos[0] - screen_width/2)))
host_ball.sx = int(speed * math.cos(angle))
host_ball.sy = int(speed * math.sin(angle))
elif pos[0] > screen_width/2 and pos[1] < screen_height/2:
angle = abs(math.atan((pos[1] - screen_height/2) / (pos[0] - screen_width/2)))
host_ball.sx = int(speed * math.cos(angle))
host_ball.sy = -int(speed * math.sin(angle))
elif pos[0] < screen_width/2 and pos[1] >= screen_height/2:
angle = abs(math.atan((pos[1] - screen_height/2) / (pos[0] - screen_width/2)))
host_ball.sx = -int(speed * math.cos(angle))
host_ball.sy = int(speed * math.sin(angle))
elif pos[0] < screen_width/2 and pos[1] < screen_height/2:
angle = abs(math.atan((pos[1] - screen_height/2) / (pos[0] - screen_width/2)))
host_ball.sx = -int(speed * math.cos(angle))
host_ball.sy = -int(speed * math.sin(angle))
elif pos[0] == screen_width/2:
host_ball.sx = 0
if pos[1] >= 0:
host_ball.sy = speed
else:
host.ball.sy = -speed
def enemy_move(balls, host_ball): # 敌人移动
for enemy in balls:
enemy.move() # 移动
enemy.value = enemy.value*(1-loss*enemy.value/100000)
if random.randint(1, int(1/enemy_bigger_pro)) == 1:
enemy.value += host_ball.value*enemy_bigger_rate
if random.randint(1, int(1/anomaly_pro)) == 1:
speed_enemy0 = speed_enemy_anomaly # 敌人异常速度
else:
speed_enemy0 = speed_enemy # 敌人正常速度
i = random.randint(1, int(1/change_pro)) # 一定的概率改变轨迹
if i == 1:
enemy.sx = random.randint(-speed_enemy0, speed_enemy0)
i2 = random.randint(0, 1)
if i2 == 0:
enemy.sy = int((speed_enemy0 ** 2 - enemy.sx ** 2) ** 0.5)
else:
enemy.sy = -int((speed_enemy0 ** 2 - enemy.sx ** 2) ** 0.5)
def eat_each_other(host_ball, balls, dots): # 吃球
for enemy in balls:
for enemy2 in balls:
enemy.eat_ball(enemy2) # 敌人互吃
for food in dots:
enemy.eat_ball(food) # 敌人吃点点
for enemy in balls:
host_ball.eat_ball(enemy) # 我吃敌人
enemy.eat_ball(host_ball) # 敌人吃我
for food in dots:
host_ball.eat_ball(food) # 我吃点点
def paint(host_ball, balls, dots, screen):
screen.fill((0, 0, 0)) # 刷漆
if host_ball.is_alive:
host_ball.draw(screen)
for enemy in balls: # 遍历容器
if enemy.is_alive:
enemy.draw(screen)
else:
balls.remove(enemy)
for food in dots: # 遍历容器
if food.is_alive:
food.draw(screen)
else:
dots.remove(food)
def main():
pygame.init() # 初始化
screen = pygame.display.set_mode((screen_width, screen_height)) # 设置屏幕
pygame.display.set_caption("球球大作战") # 设置屏幕标题
balls = [] # 定义一容器 存放所有的敌方球
dots = [] # 定义一容器 存放所有的点点
is_running = True # 默认运行状态
host_ball = creat_my_ball() # 产生我的球
i00 = 0 # 一个参数
while is_running:
for event in pygame.event.get():
if event.type == pygame.QUIT:
is_running = False
auto_creat_dots(dots, host_ball) # 自动生成点点
auto_creat_ball(balls, host_ball) # 自动生成敌人
paint(host_ball, balls, dots, screen) # 把所有的都画出来 调用draw方法
pygame.display.flip() # 渲染
pygame.time.delay(30) # 设置动画的时间延迟
control_my_ball(host_ball) # 移动我的球
enemy_move(balls, host_ball) # 敌人的球随机运动
eat_each_other(host_ball, balls, dots) # 吃球 调用eat_ball方法
i00 += 1
if np.mod(i00, 50) == 0:
print(host_ball.value)
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,106 @@
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/706
"""
import numpy as np
import time
import matplotlib.pyplot as plt
import tushare as ts
def main():
start_clock = time.perf_counter()
pro = ts.pro_api('到官网上注册寻找Token填在这里!')
print('\n我的策略:见好就收,遇低抄底。\n'
' 【卖出】买入后涨了5%就卖出\n'
' 【买入】卖出后跌了5%就买入\n'
'注:第一天必须买进,最后一天前必须卖出(为了与不操作的做对比)\n')
number = 1
for i in range(number):
data = pro.stock_basic(exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date') # 所有股票列表
# print(data.columns) # 查看该数据的表头
# print(data) # 3688多行的股票数据
i = 1 # 查看第二行数据“万科A”股
ts_code = data.values[i, 0] # 股票代码
stock = data.values[i, 2] # 股票名称
industry = data.values[i, 4] # 属于哪个行业
start_date = '20110101' # 开始时间
end_date = '20191027' # 结束时间
df = pro.daily(ts_code=ts_code, start_date=start_date, end_date=end_date) # 查看该股票的日线数据
# print(df.columns) # 查看该数据的表头
# print(df) # 查看该股票的日线数据
close = np.array(list(reversed(df.values[:, 5]))) # 提取出收盘价,并按时间顺序排列,从过去到现在
pct_chg = np.array(list(reversed(df.values[:, 8]))) # 提取出涨跌幅,并按时间顺序排列,从过去到现在
# print(df.columns[5], '=', close, '\n') # 查看收盘价
# print(df.columns[8], '=', pct_chg, '\n') # 查看涨跌幅
profit, profit_no_operation, times, invest_money, buy_time_all, sell_time_all = back_test(close.shape[0], close, pct_chg)
# 调用回测函数,返回了“利润,未操作的利润, 按该策略操作了几次, 总投资金额, 按该策略买的时间, 按该策略卖的时间”的值
print('\n------股票:', stock, ts_code, industry, '[买入市值=%7.2f' % invest_money, ']------')
print('回测时间段:', start_date, '-', end_date)
print('操作后利润= %6.2f' % profit, ' 买入(卖出)次数=', times, ' ')
print('不操作利润= %6.2f' % profit_no_operation, '(第一天买入,最后一天卖出,中间未操作)')
end_clock = time.perf_counter()
print('CPU执行时间=', end_clock - start_clock, 's')
plt.figure(1)
plt.title('Stock Code: '+ts_code+' (red point: buy, green point: sell)')
plt.grid()
plt.plot(range(close.shape[0]), close, '-')
for i in buy_time_all:
plt.plot(i, close[int(i)], 'or', markersize=13) # 红色是买进的点
for i in sell_time_all:
plt.plot(i, close[int(i)], 'dg', markersize=13) # 绿色是卖出的点
plt.show()
def back_test(days, close, pct_chg, money_in=10000): # 定义该策略的回测效果(按旧数据检查该策略是否有效)
money_in_amount = int(money_in/close[0]) # 投资金额换算成股票股数
invest_money = close[0]*money_in_amount # 实际买了股票的金额
profit_no_operation = (close[close.shape[0]-1]-close[0])*money_in_amount # 不操作的利润
position = -1 # 买入还是卖出的状态,默认卖出
total_profit = 0
times = 0
current_buy_pct = -999
current_sell_pct = 999
buy_time_all = np.array([])
sell_time_all = np.array([])
for i in range(days): # 总天数
if i == 0: # 第一天,满仓买买买!为了和不操作的对比,第一天就要买入。
buy_time = i # 买入时间
buy_time_all = np.append(buy_time_all, [buy_time], axis=0) # 买入时间存档
position = 1 # 标记为买入状态
print('------------------第', buy_time, '天买进-------------')
else:
profit = 0
if position == 1: # 买入状态
current_buy_pct = (close[i]-close[buy_time])/close[buy_time]*100 # 买入后的涨跌情况
# print('当前买进后的涨跌情况:第', i, '天=', current_buy_pct)
if position == 0: # 卖出状态
current_sell_pct = (close[i]-close[sell_time])/close[sell_time]*100 # 卖出后的涨跌情况
if current_sell_pct < -5 and position == 0: # 卖出状态且卖出后跌了有3%,这时候买入
buy_time = i # 买入时间
buy_time_all = np.append(buy_time_all, [buy_time], axis=0) # 买入时间存档
print('------------------第', buy_time, '天买进-------------')
position = 1 # 标记为买入状态
continue
if current_buy_pct > 5 and position == 1: # 买入状态且买入后涨了有3%,这时候卖出
sell_time = i # 卖出时间
sell_time_all = np.append(sell_time_all, [sell_time], axis=0) # 卖出时间存档
print('----------第', sell_time, '天卖出,持有天数:', sell_time-buy_time, '--------------\n')
position = 0 # 标记为卖出状态
profit = close[sell_time]-close[buy_time] # 赚取利润
times = times + 1 # 买入卖出次数加1
total_profit = total_profit + profit*money_in_amount # 计算总利润
if position == 1: # 最后一天如果是买入状态,则卖出
profit = close[i]-close[buy_time] # 赚取利润
total_profit = total_profit + profit # 计算总利润
times = times + 1 # 买入卖出次数加1
print('--------------第', i, '天(最后一天)卖出,持有天数:', sell_time-buy_time, '--------------\n')
sell_time_all = np.append(sell_time_all, [i], axis=0) # 卖出时间存档
return total_profit, profit_no_operation, times, invest_money, buy_time_all, sell_time_all
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,41 @@
% This code is supported by the website: https://www.guanjihuan.com
% The newest version of this code is on the web page: https://www.guanjihuan.com/archives/766
%matlab%
%ctrl+Rctrl+T
clc; %clc
clear all; %clear all
clf; %clf
aa=1 %;,
bb=2; %;
cc1=zeros(2,3) %zeros()
cc2=eye(3,3) %
%
matrix1=[3,3;3,3] %1
matrix2=[2,0;0,2]
matrix_product_1=matrix1*matrix2 % *
matrix_product_2=matrix1.*matrix2 % .*
%
for i0=1:0.5:2 %forenda:b:cacb
for_result=i0+1i %imatlabi
end
%
if aa~=1 %matlab~===
dd=100
else
dd=300
end
matrix=[2,3;5,7]
%
[V,D]=eig(matrix) %matlabVD
%
inv1=inv(matrix) %
inv2=matrix^-1 %
%
plot([0:20],[10:-1:-10],'-o') %

View File

@@ -0,0 +1,164 @@
! This code is supported by the website: https://www.guanjihuan.com
! The newest version of this code is on the web page: https://www.guanjihuan.com/archives/762
module global ! module是用来封装程序模块的把相关功能的变量和函数封装在一起。一般来说可以不设置全局变量把这些变量写在module里在需要用的地方用use调用即可。
implicit none
double precision sqrt3,Pi
parameter(sqrt3=1.7320508075688773d0,Pi=3.14159265358979324d0) ! parameter代表不能改的常数
end module global
program main !主函数用program开始,用end program结束。在Fortran里不区分大小写。用感叹号来做注释
use global
use f95_precision !这个还不知道什么时候用上,这里注释掉也可正常运行。
use blas95 ! 里面包含了矩阵相乘的gemm()等
use lapack95 !里面包括了矩阵求逆的GETRF,GETRI和求本征矢和本征矢的GEEV等
implicit none ! implicit是用来设置默认类型即根据变量名称的第一个字母来决定变量的类型。implicit none是关闭默认类型功能所有变量要先声明
integer i,j,info,index1(2) ! 定义整型
double precision a(2,2),b(2,2),c(2,2),& ! 比较长的语句可以用&换行。在续行的开始位置可加&号,也可不加。
x1, x2, result_1, result_2, fun1 !定义双精度浮点数
complex*16 dd(2,2), eigenvalues(2) !定义复数
complex*16, allocatable:: eigenvectors(:,:) ! 定义动态分配的变量 !这里的两个冒号::是必须要的。其他的可加可不加。
character(len=15) hello, number ! 定义字符串,len是规定长度如果不写只会给一个字符的空间
allocate(eigenvectors(2,2)) ! 分配空间
write(*,*) '----输出----'
hello='hello world'
write(*,*) hello ! 第一个代表输出的设备,*代表屏幕。第二个是输出的格式,*代表默认。
write(number,'(f7.3)') pi ! 用write可以把数字类型转成字符类型。'(f7.3)'是输出浮点数的格式,如果用*来代替,字符串的长度需要够长才行。整型格式用类似'(i3)'这样
write(*,*) '数字转成字符串后再输出:', number
write(*,"(a,18x)",advance="no") hello ! advance='no'代表不换行在有advance的时候必须格式化输出否则报错。'(a)'按照字符型变量的实际长度读取这里也可以写a15或者其他。'(10x)'代表空格
write(*,*) number,'这是不换行输出测试'
write(*,"('一些固定文字也可以写在这里面', a, a,//)") hello, number !字符串也可以直接写在"()"里面。里面有引号,外面要用上双引号才行,不然会报错。
!'(a)'按照字符型变量的实际长度读取也可以写a15或者其他。这里'(/)'代表再换一次行。一个斜杠换一个。
write(*,*) '----写入文件----'
open(unit=10,file='learn-fortran-test.txt') ! 打开文件用open
write(10,*) hello, number
close(10) ! 关闭文件用close
write(*,*) ''
write(*,*) '----矩阵乘积----'
a(1,1)=2;a(1,2)=5;a(2,1)=3;a(2,2)=2 ! 两个语句写在同一行是可以的,要用分号隔开
b(1,1)=3;b(2,2)=3
write(*,*) '矩阵直接默认输出,是按列的顺序一个个输出'
write(*,*) 'a='
write(*,*) a
write(*,*) '矩阵格式化输出'
write(*,*) 'a='
do i=1,2
do j=1,2
write(*,'(f10.4)',advance='no') a(i,j) !内循环为列的指标
enddo
write(*,*) ''
enddo
write(*,*) 'b='
do i=1,2
do j=1,2
write(*,'(f10.4)',advance='no') b(i,j) !内循环为列的指标
enddo
write(*,*) ''
enddo
call gemm(a,b,c) ! 矩阵乘积用call gemm()
write(*,*) '矩阵乘积c=a*b='
do i=1,2
do j=1,2
write(*,'(f10.4)',advance='no') c(i,j) !内循环为列的指标
enddo
write(*,*) ''
enddo
write(*,*) ''
write(*,*) '----矩阵求逆----'
call getrf(a,index1,info); call getri(a,index1,info) !getrf和getri要配合起来使用求逆。
! info是需定义为整型。If info = 0, the execution is successful.
! 上面index1是在getrf产生在getri里输入。index1也是需要定义为整型而且是一维数组数组长度一般为矩阵的维度。
! 这时候a不再是原来的矩阵了而是求逆后的矩阵。
do i=1,2
do j=1,2
write(*,'(f10.4)',advance='no') a(i,j) !内循环为列的指标
enddo
write(*,*) ''
enddo
write(*,*) '----复数矩阵----'
dd(1,1)=(1.d0, 0.d0)
dd(1,2)=(7.d0, 0.d0)
dd(2,1)=(3.d0, 0.d0)
dd(2,2)=(2.d0, 0.d0)
do i=1,2
do j=1,2
write(*,"(f10.4, '+1i*',f7.4)",advance='no') dd(i,j) !内循环为列的指标
enddo
write(*,*) ''
enddo
write(*,*) ''
write(*,*) '----矩阵本征矢和本征值----'
call geev(A=dd, W=eigenvalues, VR=eigenvectors, INFO=info)
! 这里A矩阵最好用上复数W是本征值一维数组VR是本征矢二维数组都是复数。INFO是整数。
! 注意求完本征值后dd的值会发生改变不再是原来的了!
write(*,*) 'eigenvectors:'
do i=1,2
do j=1,2
write(*,"(f10.4, '+1i*',f7.4)",advance='no') eigenvectors(i,j) !内循环为列的指标。输出结果列向量为特征向量。
enddo
write(*,*) ''
enddo
write(*,*) 'eigenvalues:'
do i=1,2
write(*,"(f10.4, '+1i*',f7.4)",advance='no') eigenvalues(i)
enddo
write(*,*) ''
deallocate(eigenvectors) ! 释放动态变量的空间
write(*,*) '' ! 输出空一行
write(*,*) '----循环加判断----'
do i=1,5 ! 循环用do到enddo
if (mod(i,2)==0) then ! 判断用if()then
write(*,*) '我是偶数', i
else if (i==3) then
write(*,*) '我是第3个数字也是奇数'
else
write(*,*) '我是奇数', i
endif
enddo
write(*,*) ''
call sub1(2.d0, 3.d0, result_1, result_2) ! 这里要写成2.d0或者2.0d0表示双精度因为子程序规定该参数为双精度。写成2或者2.0都会报错。
write(*,*) '调用子程序,求和:',result_1
write(*,*) '调用子程序,乘积:',result_2
write(*,*) '使用函数,返回减法结果:', fun1(2.d0, 3.d0)
write(*,*) ''
end program
subroutine sub1(x1,x2,y1,y2) !子程序。输入输出都在括号里面用call调用。
double precision,intent(in):: x1, x2 ! 这里的两个冒号::是必须要的。
double precision,intent(out):: y1, y2
! intent(in) 表示这个参数是输入的intent(out) 表示参数是输出的intent(inout)表示这个参数同时用于两个方向的数据传递;
! intent()不是必须的但最好加上因为可读性比较强知道哪些是输入哪些是输出。而且intent(in)是不能赋值更改的,会提示错误,这样可以防止一些错误。
y1=x1+x2
y2=x1*x2
end subroutine
function fun1(x1,x2) ! 函数。函数只能返回一个数值不能多个。而子程序可以返回多个所以一般用子程序subroutine
double precision x1,x2,fun1 ! 要对函数名(或返回变量)定义
fun1=x1-x2 ! 返回变量要和函数名一样
return ! 这里的return也可以不写。写的作用是直接返回值而不运行后面的代码。一般会跟if配合用。
end function ! end

View File

@@ -0,0 +1,78 @@
! This code is supported by the website: https://www.guanjihuan.com
! The newest version of this code is on the web page: https://www.guanjihuan.com/archives/764
program hello_open_mp
use omp_lib !这里也可以写成 include 'omp_lib.h' ,两者调用方式均可
integer mcpu,tid,total,N,i,j,loop
double precision starttime, endtime, time,result_0
double precision, allocatable:: T(:)
N=5 ! 用于do并行
loop=1000000000 !如果要测试并行和串行运算时间可以加大loop值
allocate(T(N))
!call OMP_SET_NUM_THREADS(2) !人为设置线程个数,可以取消注释看效果
total=OMP_GET_NUM_PROCS() ! 获取计算机系统的处理器数量
print '(a,i2)', '计算机处理器数量:' , total !也可以用write(*,'(a,i2)')来输出
print '(a)', '-----在并行之前-----'
tid=OMP_GET_THREAD_NUM() !获取当前线程的线程号
mcpu=OMP_GET_NUM_THREADS() !获取总的线程数
print '(a,i2,a,i2)', '当前线程号:',tid,';总的线程数:', mcpu
print * !代表换行
print'(a)','-----第一部分程序开始并行-----'
!$OMP PARALLEL DEFAULT(PRIVATE) ! 这里用的是DEFAULT(PRIVATE)
tid=OMP_GET_THREAD_NUM() !获取当前线程的线程号
mcpu=OMP_GET_NUM_THREADS() !获取总的线程数
print '(a,i2,a,i2)', '当前线程号:',tid,';总的线程数:', mcpu
!$OMP END PARALLEL
print * !代表换行
print'(a)','-----第二部分程序开始并行-----'
starttime=OMP_GET_WTIME() !获取开始时间
!$OMP PARALLEL DO DEFAULT(PRIVATE) SHARED(T,N,loop) ! 默认私有变量,把需要的参数以及各节点计算结果的存放器作为共享变量。
do i=1,N !这里放上do循环体。是多个样品。
result_0=0
tid=OMP_GET_THREAD_NUM() !获取当前线程的线程号
mcpu=OMP_GET_NUM_THREADS() !获取总的线程数
do j=1,loop !这代表我们要做的计算~
result_0 = result_0+1 !这代表我们要做的计算~
enddo !这代表我们要做的计算~
T(i) = result_0-loop+i !将各个线程的计算结果保存到公共变量中去。
!这里i代表各个循环的参数之后如果有需要可以根据参数再整理数据。
print '(a,i2, a, f10.4,a,i2,a,i2 )', 'T(',i,')=', T(i) , ' 来源于线程号',tid,';总的线程数:', mcpu
enddo
!$OMP END PARALLEL DO !并行结束
endtime=OMP_GET_WTIME() !获取结束时间
time=endtime-starttime !总运行时间
print '(a, f13.5)' , '第二部分程序按并行计算所用的时间:', time
print * !代表换行
print'(a)','-----第二部分程序按串行的计算-----'
starttime=OMP_GET_WTIME() !获取开始时间
do i=1,N
result_0=0
tid=OMP_GET_THREAD_NUM() !获取当前线程的线程号
mcpu=OMP_GET_NUM_THREADS() !获取总的线程数
do j=1,loop
result_0 = result_0+1
enddo
T(i) = result_0-loop+i
print '(a,i2, a, f10.4,a,i2,a,i2 )', 'T(' ,i,')=', T(i) , ' 来源于线程号',tid,';总的线程数:', mcpu
enddo
endtime=OMP_GET_WTIME() !获取结束时间
time=endtime-starttime !总运行时间
print '(a, f13.5)' , '第二部分程序按串行计算所用的时间:', time
print * !代表换行
tid=OMP_GET_THREAD_NUM() !获取当前线程的线程号
mcpu=OMP_GET_NUM_THREADS() !获取总的线程数
print '(a,i5,a,i5)', '当前线程号:',tid,';总的线程数:', mcpu
print * !代表换行
end program hello_open_mp ! 这里可以写成end, 也可以写成end program都可以。

View File

@@ -0,0 +1,33 @@
# 一级标题
## 二级标题
### 三级标题
#### 四级标题
有序列表:数字加一个点
1. 列表内容
2. 列表内容
3. 列表内容
无序列表:用 + - * 任何一种都可以(为了不和其他记号重复,个人倾向于用+
+ 列表内容
+ 嵌套前面加几个空格(为了保险起见,个人倾向于用四个空格)
+ 列表内容
+ 列表嵌套
+ 列表嵌套
+ 列表嵌套
分割线:三个或者三个以上的 - 或 *(为了不和其他记号重复,个人倾向于用---
----
*倾斜:前后一个星号*
**加粗:前后两个星号**
***斜体加粗:前后三个星号***
| 表头 | 表头 | 表头 |
| --- | --- | --- |
| 内容 | 内容 | 内容 |
| 内容 | 内容 | 内容 |
| 内容 | 内容 | 内容 |

View File

@@ -0,0 +1,6 @@
\documentclass{article} %文档类声明
%导言区(文档类声明和正文间的是导言区)
\begin{document} %正文开始
Hello, world! %正文
\end{document} %正文结束
\end{document}

View File

@@ -0,0 +1,18 @@
\documentclass{article} %文档类声明
\usepackage{ctex} %一个支持中文宏包,如果不用中文无法显示
\begin{document} %正文
\title{这是一个标题} %标题
\author{作者名字} %作者
\date{} %\maketitle默认会加上当前时间用\date{},空着内容可以取消时间的显示
\maketitle %加了这个,标题、作者等信息才会显示
\section{}
\subsection{小节}
\subsubsection{子小节}
Hello, world! %下面空一行代表换行
\textbf{\textbackslash textbf\{\}可以加粗文本} %\textbf{}可以加粗文本
\section{}
\part{}
\part{}
\end{document}

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

View File

@@ -0,0 +1,50 @@
\documentclass{beamer}
\usepackage{ctex} %一个支持中文宏包,如果不用中文无法显示
\usepackage{graphicx} %这个包提供\includegraphics命令来插入图片
\usetheme{Boadilla} %主题
\usecolortheme{default} %主题的颜色
\title{这是PPT标题} %标题
\author{作者名字\inst{1},作者名字\inst{2}} %作者
\institute{\inst{1}第一个单位\and\inst{2}第二个单位} %这里的\and有换行的效果
\date{\today} %时间(默认也会显示)
\logo{\includegraphics[height=1.0cm]{1.jpg}} %右下角的小log
\begin{document} %正文开始
\begin{frame} %相当于ppt里的一页
\titlepage %标题页
\end{frame}
\begin{frame}
\frametitle{目录} %当前页的标题
\tableofcontents %制作目录,需要\section{}配合
\end{frame}
\section{第一节} %用来做目录
\begin{frame}
\frametitle{当前页的标题1}
这是第一节第一页的内容。This is a text in the first frame. This is a text in the first frame. This is a text in the first frame.
\end{frame}
\section{第二节}
\begin{frame}
\frametitle{当前页的标题2}
这是第二节第一页的内容。这里使用了\textbackslash pause。\pause This is a text in the second frame. This is a text in the second frame. This is a text in the second frame. %\pause是暂停前后会分成两页。
\end{frame}
\begin{frame}
\frametitle{当前页的标题3Two-column slide}
\begin{columns} %分成列
\column{0.5\textwidth} %占用一半
This is a text in first column.
$$E=mc^2$$
\begin{itemize} %制作列表
\item First item
\item Second item
\end{itemize}
\column{0.5\textwidth} %占用一半
This text will be in the second column
and on a second tought this is a nice looking
layout in some cases.
\end{columns}
\end{frame}
\end{document}

View File

@@ -0,0 +1,41 @@
from multiprocessing import Process
import os
import time
def run_proc(name): # 要执行的代码
start_time = time.perf_counter()
for i in range(300000000):
x = 100000^1000000000000
end_time = time.perf_counter()
print ('Process id running on %s = %s' % (name, os.getpid()), '; running time = %s' % (end_time-start_time))
if __name__ == '__main__':
# 串行
print('串行程序')
print('Process id = %s.' % os.getpid())
start_time = time.perf_counter()
run_proc('job1')
run_proc('job2')
run_proc('job3')
run_proc('job4')
end_time = time.perf_counter()
print('CPU执行时间(s)=', (end_time-start_time), '\n')
# 并行
print('并行程序')
print('Process id = %s.' % os.getpid())
start_time = time.perf_counter()
p = Process(target=run_proc, args=('job1',))
p.start()
p = Process(target=run_proc, args=('job2',))
p.start()
p = Process(target=run_proc, args=('job3',))
p.start()
p = Process(target=run_proc, args=('job4',))
p.start()
p.join() # join()方法可以等待子进程结束后再继续往下运行
end_time = time.perf_counter()
print('CPU执行时间(s)=', (end_time-start_time))

View File

@@ -0,0 +1,47 @@
from bs4 import BeautifulSoup
from urllib.request import urlopen
# 最简单的情况
html = urlopen("https://mofanpy.com/static/scraping/basic-structure.html").read().decode('utf-8')
print('\n显示网页的代码信息1\n\n ----------------开始----------------\n', html, '\n\n----------------结束----------------') # 显示网页的代码信息
soup = BeautifulSoup(html, features='lxml') # 把网页放进BeautifulSoup
print('\n获取标签_标题h1_中的内容soup.h1\n', soup.h1)
print('\n获取标签_段落p_中的内容soup.p\n', soup.p)
print('\n获取标签_链接a_中的内容soup.a\n', soup.a)
all_href = soup.find_all('a')
print('\n获取所有"a标签"的内容soup.find_all(a)\n', all_href)
print('\n获取某个字典的值_1')
for a in all_href:
print(a)
print(a['href'])
all_href = [a['href'] for a in all_href]
print('\n获取某个字典的值_2\n', all_href, '\n')
# 加入CSS内容
html = urlopen("https://mofanpy.com/static/scraping/list.html").read().decode('utf-8')
print('\n显示网页的代码信息2\n\n ----------------开始----------------\n', html, '\n\n----------------结束----------------') # 显示网页的代码信息
soup = BeautifulSoup(html, features='lxml') # 把网页放进BeautifulSoup
print('\n利用class筛选出所需要的信息')
month = soup.find_all('li', {"class": "month"})
print(month, '\n')
print('只显示文本:')
for m in month:
print(m.get_text())
print('\n 多次筛选:')
january = soup.find('ul', {"class": 'jan'})
print(january, '\n')
d_january = january.find_all('li') # use january as a parent
print(d_january, '\n')
for d in d_january:
print(d.get_text())

View File

@@ -0,0 +1,45 @@
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/6846
"""
from bs4 import BeautifulSoup
from urllib.request import urlopen
import re # 正则模块
import requests
import os
# os.chdir('D:') # 设置文件保存的位置
# 输入
address_array = []
for i in range(10): # 最多一次性下载10篇
address = input('\n输入DOI/链接/标题:')
address_array.append(address)
continue_or_not = input('\n继续添加1/不继续添加0')
if int(continue_or_not) == 0:
break
# 下载
for address in address_array:
r = requests.post('https://sci-hub.st/', data={'request': address})
print('\n响应结果是:', r)
print('访问的地址是:', r.url)
soup = BeautifulSoup(r.text, features='lxml')
pdf_URL = soup.iframe['src']
if re.search(re.compile('^https:'), pdf_URL):
pass
else:
pdf_URL = 'https:'+pdf_URL
print('PDF的地址是', pdf_URL)
name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::]
print('PDF文件名是', name)
print('保存的位置在:', os.getcwd())
print('\n正在下载')
r = requests.get(pdf_URL, stream=True)
with open(name, 'wb') as f:
for chunk in r.iter_content(chunk_size=32):
f.write(chunk)
print('下载完成!')
print('\n全部下载完成!')

View File

@@ -0,0 +1,77 @@
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/6869
"""
import PyPDF2
import os
import re
from bs4 import BeautifulSoup
from urllib.request import urlopen
import requests
def main():
os.chdir('D:/') # PDF文件存放的位置
filename = input('输入PDF文件名')
pdfFile = open(filename+'.pdf','rb') # 打开PDF文件
links = all_links_in_pdf(pdfFile) # 获取PDF文件中的链接
pdfFile.close() # 关闭PDF文件
os.chdir('D:/Reference') # 设置参考文献保存的位置
download(links) # 下载文献
def all_links_in_pdf(pdfFile):
pdfReader = PyPDF2.PdfFileReader(pdfFile)
pages = pdfReader.getNumPages()
i0 = 0
links = []
print()
for page in range(pages):
pageSliced = pdfReader.getPage(page)
pageObject = pageSliced.getObject()
if '/Annots' in pageObject.keys():
ann = pageObject['/Annots']
old = ''
for a in ann:
u = a.getObject()
if '/A' in u.keys():
if re.search(re.compile('^https://doi.org'), u['/A']['/URI']): # 排除其他形式的链接
if u['/A']['/URI'] != old: # 排除重复链接
print(i0 , u['/A']['/URI'])
links.append(u['/A']['/URI']) # 链接存在link数组中
i0 += 1
old = u['/A']['/URI']
return links
def download(links):
for i0 in [0, 1, 3]: # 指定参考文献下载如需全部下载用for i0 in range(links.shape[0]):
address = links[i0]
r = requests.post('https://sci-hub.st/', data={'request': address})
print('\n响应结果是:', r)
print('访问的地址是:', r.url)
soup = BeautifulSoup(r.text, features='lxml')
pdf_URL = soup.iframe['src']
if re.search(re.compile('^https:'), pdf_URL):
pass
else:
pdf_URL = 'https:'+pdf_URL
print('PDF的地址是', pdf_URL)
name = re.search(re.compile('fdp.*?/'),pdf_URL[::-1]).group()[::-1][1::]
print('PDF文件名是', name)
print('保存的位置在:', os.getcwd())
print('\n正在下载第',i0,'')
r = requests.get(pdf_URL, stream=True)
with open(name, 'wb') as f:
for chunk in r.iter_content(chunk_size=32):
f.write(chunk)
print('',i0,'篇下载完成!')
print('\n全部下载完成!')
if __name__ == '__main__':
main()

View File

@@ -0,0 +1,30 @@
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/6869
"""
import PyPDF2
import os
import re
os.chdir('D:/') # PDF文件存放的位置
filename = input('输入PDF文件名')
pdfFile = open(filename+'.pdf','rb')
pdfReader = PyPDF2.PdfFileReader(pdfFile)
pages = pdfReader.getNumPages()
i0 = 0
for page in range(pages):
pageSliced = pdfReader.getPage(page)
pageObject = pageSliced.getObject()
if '/Annots' in pageObject.keys():
ann = pageObject['/Annots']
old = ''
for a in ann:
u = a.getObject()
if '/A' in u.keys():
if re.search(re.compile('^https://doi.org'), u['/A']['/URI']): # 排除其他形式的链接
if u['/A']['/URI'] != old: # 排除重复链接
print(i0 , u['/A']['/URI'])
i0 += 1
old = u['/A']['/URI']
pdfFile.close()

View File

@@ -0,0 +1,21 @@
import os
import time
start = time.time()
print('程序1开始的时间', time.ctime())
start1 = time.time()
os.chdir('D:') # 代码位置
os.system('python a.py') # 运行a.py
end1 = time.time()
print('程序1运行时间(min)=', (end1-start1)/60,'\n')
print('程序2开始的时间', time.ctime())
start2 = time.time()
os.chdir('E:') # 代码位置
os.system('python b.py') # 运行b.py
end2 = time.time()
print('程序2运行时间(min)=', (end2-start2)/60, '\n')
end = time.time()
print('总运行时间(min)=', (end-start)/60)

View File

@@ -0,0 +1,137 @@
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/9129
"""
import os
import re
import time
import logging
logging.Logger.propagate = False
logging.getLogger().setLevel(logging.ERROR) # 只显示error级别的通知
def main():
# 参数
key_word_array = ['photonic', 'Berry phase']
original_path = 'D:\\文献'
# 查找所有的PDF文件路径
pdf_file_all = find_files_pdf(original_path)
print('\n该文件夹下总共有', len(pdf_file_all), '个PDF文件。\n')
f = open('error.txt','w',encoding='utf-8')
f.close()
for key_word in key_word_array:
f = open(str(key_word)+'.txt','w',encoding='utf-8')
f.write('该文件夹下总共有'+str(len(pdf_file_all))+'个PDF文件。\n')
f.close()
# 查找包含关键词的PDF文件
i0 = 1
begin = time.time()
for pdf_file in pdf_file_all:
print('查找第', i0, '个文件,', end='')
begin0 = time.time()
try:
content = get_text_from_pdf(pdf_file)
for key_word in key_word_array:
if re.search(re.compile(key_word),content):
print('发现文件!关键词', key_word, '对应的文件位置在:\n\n', pdf_file, '\n')
with open(str(key_word)+'.txt','a',encoding='utf-8') as f:
f.write('\n查找第'+str(i0)+'个文件时发现文件!位置在:\n'+pdf_file+'\n')
except:
print('出现异常!位置在:\n\n', pdf_file, '\n')
with open('error.txt','a',encoding='utf-8') as f:
f.write('\n解析第'+str(i0)+'个文件时出现异常!位置在:\n'+pdf_file+'\n')
end0 = time.time()
print('用时', end0-begin0, '')
i0 += 1
print('\n全部搜索结束!')
end = time.time()
print('\n总共用时:', (end-begin)/60, '')
def find_files_pdf(path): # 查找所有PDF文件
file_all = find_files(path)
pdf_file_all = []
for file0 in file_all:
if re.search(re.compile('^fdp.'),file0[::-1]): # 如果文件是以.pdf结尾
pdf_file_all.append(file0)
return pdf_file_all
def find_files(path): # 查找所有文件
file_all = []
path_next_loop = [path]
for i in range(10000): # i为文件在文件夹中的深度
file_all_in_one_loop, path_next_loop = find_files_loop_module(path_next_loop)
for file_in_one_loop in file_all_in_one_loop:
file_all.append(file_in_one_loop)
if path_next_loop == []:
break
return file_all
def find_files_loop_module(path_all): # 查找文件的一个循环模块
file_all_in_one_loop = []
path_next_loop = []
for path in path_all:
filenames = os.listdir(path)
for filename in filenames:
filename = os.path.join(path,filename)
if os.path.isfile(filename): # 如果是文件
file_all_in_one_loop.append(filename)
else: # 如果是文件夹
path_next_loop.append(filename)
return file_all_in_one_loop, path_next_loop
def get_text_from_pdf(file_path): # 从PDF中获取文本
from pdfminer.pdfparser import PDFParser, PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LAParams, LTTextBox
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
# 用文件对象来创建一个pdf文档分析器
praser = PDFParser(open(file_path, 'rb'))
# 创建一个PDF文档
doc = PDFDocument()
# 连接分析器 与文档对象
praser.set_document(doc)
doc.set_parser(praser)
# 提供初始化密码
# 如果没有密码 就创建一个空的字符串
doc.initialize()
# 检测文档是否提供txt转换不提供就忽略
if not doc.is_extractable:
raise PDFTextExtractionNotAllowed
else:
# 创建PDf 资源管理器 来管理共享资源
rsrcmgr = PDFResourceManager()
# 创建一个PDF设备对象
laparams = LAParams()
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
# 创建一个PDF解释器对象
interpreter = PDFPageInterpreter(rsrcmgr, device)
# 循环遍历列表每次处理一个page的内容
content = ''
for page in doc.get_pages():
interpreter.process_page(page)
# 接受该页面的LTPage对象
layout = device.get_result()
# 这里layout是一个LTPage对象里面存放着这个 page 解析出的各种对象
# 包括 LTTextBox, LTFigure, LTImage, LTTextBoxHorizontal 等
for x in layout:
if isinstance(x, LTTextBox):
# print(x.get_text().strip())
content = content + x.get_text().strip()
return content
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,63 @@
import os
os.chdir('D:/') # PDF文件存放的位置
import logging
logging.Logger.propagate = False
logging.getLogger().setLevel(logging.ERROR) # 只显示error级别的通知
def main():
content = get_text_from_pdf('a')
with open('a.txt', 'w', encoding='utf-8') as f:
f.write(content)
def get_text_from_pdf(filename):
from pdfminer.pdfparser import PDFParser, PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LAParams, LTTextBox
from pdfminer.pdfinterp import PDFTextExtractionNotAllowed
path = filename+".pdf"
# 用文件对象来创建一个pdf文档分析器
praser = PDFParser(open(path, 'rb'))
# 创建一个PDF文档
doc = PDFDocument()
# 连接分析器 与文档对象
praser.set_document(doc)
doc.set_parser(praser)
# 提供初始化密码
# 如果没有密码 就创建一个空的字符串
doc.initialize()
# 检测文档是否提供txt转换不提供就忽略
if not doc.is_extractable:
raise PDFTextExtractionNotAllowed
else:
# 创建PDf 资源管理器 来管理共享资源
rsrcmgr = PDFResourceManager()
# 创建一个PDF设备对象
laparams = LAParams()
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
# 创建一个PDF解释器对象
interpreter = PDFPageInterpreter(rsrcmgr, device)
# 循环遍历列表每次处理一个page的内容
content = ''
for page in doc.get_pages():
interpreter.process_page(page)
# 接受该页面的LTPage对象
layout = device.get_result()
# 这里layout是一个LTPage对象里面存放着这个 page 解析出的各种对象
# 包括 LTTextBox, LTFigure, LTImage, LTTextBoxHorizontal 等
for x in layout:
if isinstance(x, LTTextBox):
# print(x.get_text().strip())
content = content + x.get_text().strip()
return content
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,29 @@
import numpy as np
# 设置
cpus = 7 # 使用的CPU个数等于提交任务的个数
parameter_array_all = np.arange(0, 10, 0.1) # 需要计算的参数
# 通过.sh脚本文件修改的任务指标。job_index从0开始最大值为cpus-1
job_index = -1
# 预处理
len_of_parameter_all = len(parameter_array_all) # 需要计算参数的个数
if len_of_parameter_all%cpus == 0:
len_parameter = int(len_of_parameter_all/cpus) # 一个CPU/任务需要计算参数的个数
parameter_array = parameter_array_all[job_index*len_parameter:(job_index+1)*len_parameter]
else:
len_parameter = int(len_of_parameter_all/(cpus-1)) # 一个CPU/任务需要计算参数的个数
if job_index != cpus-1:
parameter_array = parameter_array_all[job_index*len_parameter:(job_index+1)*len_parameter]
else:
parameter_array = parameter_array_all[job_index*len_parameter:len_of_parameter_all]
# 任务
with open('a'+str(job_index)+'.txt', 'w') as f:
for parameter in parameter_array:
result = parameter**2
f.write(str(parameter)+' '+str(result)+'\n')

View File

@@ -0,0 +1,5 @@
#!/bin/sh
#PBS -N task
#PBS -l nodes=1:ppn=1
export OMP_NUM_THREADS=1
python a.py

View File

@@ -0,0 +1,6 @@
f = open('combine.txt', 'w')
for job_index in range(7):
with open('a'+str(job_index)+'.txt', 'r') as f0:
text = f0.read()
f.write(text)
f.close()

View File

@@ -0,0 +1,16 @@
0.0 0.0
0.1 0.010000000000000002
0.2 0.04000000000000001
0.30000000000000004 0.09000000000000002
0.4 0.16000000000000003
0.5 0.25
0.6000000000000001 0.3600000000000001
0.7000000000000001 0.4900000000000001
0.8 0.6400000000000001
0.9 0.81
1.0 1.0
1.1 1.2100000000000002
1.2000000000000002 1.4400000000000004
1.3 1.6900000000000002
1.4000000000000001 1.9600000000000004
1.5 2.25

View File

@@ -0,0 +1,16 @@
1.6 2.5600000000000005
1.7000000000000002 2.8900000000000006
1.8 3.24
1.9000000000000001 3.6100000000000003
2.0 4.0
2.1 4.41
2.2 4.840000000000001
2.3000000000000003 5.290000000000001
2.4000000000000004 5.760000000000002
2.5 6.25
2.6 6.760000000000001
2.7 7.290000000000001
2.8000000000000003 7.840000000000002
2.9000000000000004 8.410000000000002
3.0 9.0
3.1 9.610000000000001

View File

@@ -0,0 +1,16 @@
3.2 10.240000000000002
3.3000000000000003 10.890000000000002
3.4000000000000004 11.560000000000002
3.5 12.25
3.6 12.96
3.7 13.690000000000001
3.8000000000000003 14.440000000000001
3.9000000000000004 15.210000000000003
4.0 16.0
4.1000000000000005 16.810000000000006
4.2 17.64
4.3 18.49
4.4 19.360000000000003
4.5 20.25
4.6000000000000005 21.160000000000004
4.7 22.090000000000003

View File

@@ -0,0 +1,16 @@
4.800000000000001 23.040000000000006
4.9 24.010000000000005
5.0 25.0
5.1000000000000005 26.010000000000005
5.2 27.040000000000003
5.300000000000001 28.090000000000007
5.4 29.160000000000004
5.5 30.25
5.6000000000000005 31.360000000000007
5.7 32.49
5.800000000000001 33.64000000000001
5.9 34.81
6.0 36.0
6.1000000000000005 37.21000000000001
6.2 38.440000000000005
6.300000000000001 39.69000000000001

View File

@@ -0,0 +1,16 @@
6.4 40.96000000000001
6.5 42.25
6.6000000000000005 43.56000000000001
6.7 44.89
6.800000000000001 46.24000000000001
6.9 47.61000000000001
7.0 49.0
7.1000000000000005 50.41000000000001
7.2 51.84
7.300000000000001 53.29000000000001
7.4 54.760000000000005
7.5 56.25
7.6000000000000005 57.760000000000005
7.7 59.290000000000006
7.800000000000001 60.84000000000001
7.9 62.410000000000004

View File

@@ -0,0 +1,16 @@
8.0 64.0
8.1 65.61
8.200000000000001 67.24000000000002
8.3 68.89000000000001
8.4 70.56
8.5 72.25
8.6 73.96
8.700000000000001 75.69000000000001
8.8 77.44000000000001
8.9 79.21000000000001
9.0 81.0
9.1 82.80999999999999
9.200000000000001 84.64000000000001
9.3 86.49000000000001
9.4 88.36000000000001
9.5 90.25

View File

@@ -0,0 +1,4 @@
9.600000000000001 92.16000000000003
9.700000000000001 94.09000000000002
9.8 96.04000000000002
9.9 98.01

View File

@@ -0,0 +1,100 @@
0.0 0.0
0.1 0.010000000000000002
0.2 0.04000000000000001
0.30000000000000004 0.09000000000000002
0.4 0.16000000000000003
0.5 0.25
0.6000000000000001 0.3600000000000001
0.7000000000000001 0.4900000000000001
0.8 0.6400000000000001
0.9 0.81
1.0 1.0
1.1 1.2100000000000002
1.2000000000000002 1.4400000000000004
1.3 1.6900000000000002
1.4000000000000001 1.9600000000000004
1.5 2.25
1.6 2.5600000000000005
1.7000000000000002 2.8900000000000006
1.8 3.24
1.9000000000000001 3.6100000000000003
2.0 4.0
2.1 4.41
2.2 4.840000000000001
2.3000000000000003 5.290000000000001
2.4000000000000004 5.760000000000002
2.5 6.25
2.6 6.760000000000001
2.7 7.290000000000001
2.8000000000000003 7.840000000000002
2.9000000000000004 8.410000000000002
3.0 9.0
3.1 9.610000000000001
3.2 10.240000000000002
3.3000000000000003 10.890000000000002
3.4000000000000004 11.560000000000002
3.5 12.25
3.6 12.96
3.7 13.690000000000001
3.8000000000000003 14.440000000000001
3.9000000000000004 15.210000000000003
4.0 16.0
4.1000000000000005 16.810000000000006
4.2 17.64
4.3 18.49
4.4 19.360000000000003
4.5 20.25
4.6000000000000005 21.160000000000004
4.7 22.090000000000003
4.800000000000001 23.040000000000006
4.9 24.010000000000005
5.0 25.0
5.1000000000000005 26.010000000000005
5.2 27.040000000000003
5.300000000000001 28.090000000000007
5.4 29.160000000000004
5.5 30.25
5.6000000000000005 31.360000000000007
5.7 32.49
5.800000000000001 33.64000000000001
5.9 34.81
6.0 36.0
6.1000000000000005 37.21000000000001
6.2 38.440000000000005
6.300000000000001 39.69000000000001
6.4 40.96000000000001
6.5 42.25
6.6000000000000005 43.56000000000001
6.7 44.89
6.800000000000001 46.24000000000001
6.9 47.61000000000001
7.0 49.0
7.1000000000000005 50.41000000000001
7.2 51.84
7.300000000000001 53.29000000000001
7.4 54.760000000000005
7.5 56.25
7.6000000000000005 57.760000000000005
7.7 59.290000000000006
7.800000000000001 60.84000000000001
7.9 62.410000000000004
8.0 64.0
8.1 65.61
8.200000000000001 67.24000000000002
8.3 68.89000000000001
8.4 70.56
8.5 72.25
8.6 73.96
8.700000000000001 75.69000000000001
8.8 77.44000000000001
8.9 79.21000000000001
9.0 81.0
9.1 82.80999999999999
9.200000000000001 84.64000000000001
9.3 86.49000000000001
9.4 88.36000000000001
9.5 90.25
9.600000000000001 92.16000000000003
9.700000000000001 94.09000000000002
9.8 96.04000000000002
9.9 98.01

View File

@@ -0,0 +1,14 @@
#!/bin/sh
for job_index in 0 1 2 3 4 5 6
do
cp a.py a${job_index}.py
sed -i "s/job_index = -1/job_index = ${job_index}/" a${job_index}.py
cp a.sh a${job_index}.sh
sed -i "s/python a.py/python a${job_index}.py/" a${job_index}.sh
qsub a${job_index}.sh
done

View File

@@ -0,0 +1,24 @@
import os
def find_files_and_directory(path):
file = []
directory = []
for path in path:
filenames = os.listdir(path)
for filename in filenames:
filename = os.path.join(path,filename)
if os.path.isfile(filename): # 如果是文件
file.append(filename)
else: # 如果是文件夹
directory.append(filename)
return file, directory
file, directory = find_files_and_directory(['E:/zotero/storage'])
i0 = 0
for path in directory:
file, directory = find_files_and_directory([path])
if len(file)!=3:
i0 += 1
print(path, '文件夹中有', len(file), '个文件')
print('文件夹中文件个数不等于3的总个数:', i0)

View File

@@ -0,0 +1,89 @@
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/13623
"""
from bs4 import BeautifulSoup
from urllib.request import urlopen
import re
from collections import Counter
import datetime
"""
支持APS系列的首页和recent页面。
例如PRB期刊https://journals.aps.org/prb 和 https://journals.aps.org/prb/recent
请勿大量、循环运行爬虫防止IP地址被官网拉入黑名单。
"""
# 访问链接
visit_link = "https://journals.aps.org/prb"
# visit_link = "https://journals.aps.org/prb/recent"
# visit_link = "https://journals.aps.org/prl"
# visit_link = "https://journals.aps.org/prl/recent"
# 获取文章链接
html = urlopen(visit_link).read().decode('utf-8')
soup = BeautifulSoup(html, features='lxml')
all_a_tag = soup.find_all('a', href=True)
match_href = []
for a_tag in all_a_tag:
href = a_tag['href']
if re.search('https://journals.aps.org/.*/abstract', href) or re.search('.*/abstract/', href):
if href not in match_href and re.search('\?', href)==None:
if re.search('https://journals.aps.org', href)==None:
href = 'https://journals.aps.org'+ href
match_href.append(href)
print('\n当前页面总共有', len(match_href), '篇文章。\n')
# 获取文章中的摘要内容
i0 = 0
year = datetime.datetime.now().year
month = datetime.datetime.now().month
day = datetime.datetime.now().day
f = open(str(year)+'.'+str(month).rjust(2,'0')+'.'+str(day).rjust(2,'0')+'_word_list.txt', 'w', encoding='UTF-8')
ignore_inner = ['alt="Figure', 'the', '<math', 'to', 'of', 'in', 'under', '<i', 'and', 'by', 'The', 'at', 'with', 'up', 'be', 'above', 'below', 'are', 'is', 'for', 'that', 'as', 'we', '<a', 'abstract', 'abstract"','<span', 'which', 'We', 'such', 'has', 'two', 'these', 'it', 'all', 'results', 'result', 'each', 'have', 'between', 'on', 'an', 'can', 'also', 'from', 'Our', 'our', 'using', 'where', 'These', 'out', 'both', 'due', 'less', 'along', 'but', 'In', 'show', 'into', 'study', 'find', 'provide', 'change', '(<math','not', 'open', 'this', 'show', 'into', 'study', 'find', 'provide', 'change', 'present', 'Using', 'large', 'This', 'However', 'appear', 'studied', 'obtain', 'been', 'Both', 'they', 'effects', 'effect', 'compute', 'more', 'does', 'shown', 'Based', 'reveal', 'highly', 'number', 'However,', 'was', 'near', 'full', 'based', 'several', 'suggest', 'agreement', 'predicted', 'values', 'work', 'emphasize', 'without', 'or', 'work,', 'studies', 'future', 'identify', 'present.', 'predict', 'presence', 'their', 'were', 'From', 'its', 'By', 'how', 'ground', 'observed', 'recent', 'For', 'other', 'Here', 'test', 'further', 'Its', 'similar', 'however,', 'range', 'within', 'value', 'possible', 'may', 'than', 'low', 'us', 'obtained', 'around', 'consider', 'about', 'very', 'will', 'when', 'played', 'consist', 'consists', 'Here,', 'observe', 'gives', 'It', 'over', 'cannot', 'As', 'whose', 'new', 'some', 'only', 'from', 'yields', 'shows', 'data', 'direct', 'related', 'different', 'evidence', 'role', 'function', 'origin', 'specific', 'set', 'confirm', 'give', 'Moreover', 'develop', 'including', 'could', 'used', 'means', 'allows', 'make', 'e.g.,', 'provides', 'system', 'systems', 'field', 'fields', 'model', 'model,', 'state', 'states', 'states.', 'state.', 'band', 'bands', 'method', 'methods', 'nature', 'rate', 'zero', 'single', 'theory', 'first', 'one', 'complex', 'approach', 'schemes', 'terms', 'even', 'case', 'analysis', 'weight', 'volume', 'evolution', 'well', 'external', 'measured', 'introducing', 'dependence', 'properties', 'demonstrate', 'remains', 'through', 'measurements', 'samples', 'findings', 'respect', 'investigate', 'behavior', 'importance', 'considered', 'experimental', 'increase', 'propose', 'follows', 'increase', 'emerged', 'interesting', 'behaviors', 'influenced', 'paramount', 'indicate', 'Rev.', 'concepts', 'induced', 'zone', 'regions', 'exact', 'contribution', 'behavior', 'formation', 'measurements.', 'utilizing', 'constant', 'regime', 'features', 'strength', 'compare', 'determined', 'combination', 'compare', 'determined', 'At', 'inside', 'ambient', 'then', 'important', 'report', 'Moreover,', 'Despite', 'found', 'because', 'process', 'and,', 'significantly', 'realized', 'much', 'natural', 'since', 'grows', 'any', 'compared', 'while', 'forms.', 'appears', 'indicating', 'coefficient', 'suggested', 'time', 'exhibits', 'calculations.', 'developed', 'array', 'discuss', 'field', 'becomes', 'allowing', 'indicates', 'via', 'introduce', 'considering', 'times.', 'constructed', 'explain', 'form', 'owing', 'parameters.', 'parameter', 'operation', 'probe', 'experiments', 'interest', 'strategies', 'seen', 'emerge', 'generic', 'geometry', 'numbers', 'observation', 'avenue', 'theretically', 'three', 'excellent', 'amount', 'notable', 'example', 'being', 'promising', 'latter', 'little', 'imposed', 'put', 'resource', 'together', 'produce', 'successfully','there', 'enhanced', 'this', 'great', 'dirven', 'increasing','should', 'otherwise', 'Further', 'field,', 'known', 'changes', 'still', 'beyond', 'various', 'center', 'previously', 'way', 'peculiar', 'detailed', 'understanding', 'good', 'years', 'where', 'Me', 'origins', 'years.', 'attributed', 'known,', 'them', 'reported', 'no', 'systems', 'agree', 'examined', 'rise', 'calculate', 'those', 'particular', 'relation', 'defined', 'either', 'again', 'current', 'exhibit', 'calculated', 'here', 'made', 'Further', 'consisting', 'constitutes', 'originated', 'if', 'exceed', 'access']
for href in match_href:
i0 += 1
print('正在读取第', i0, '')
html = urlopen(href).read().decode('utf-8')
abstract = re.findall('<a name="abstract">.*<li>Received', html, re.S)[0]
word_list = abstract.split(' ')
word_list_for_one_href = []
for word in word_list:
if 1<len(word)<35 and word not in ignore_inner and re.search('class=', word)==None and re.search('data-', word)==None and re.search('><', word)==None:
if word not in word_list_for_one_href: # 每篇文章的某个词汇只统计一次
word_list_for_one_href.append(word)
f.write(str(word)+' ')
f.close()
"""
运行一次后,以上的代码可以注释,不需要多次访问网址。
以下代码调用的是上面代码生成的txt文件可个性选择忽略的词汇多次运行调试。
"""
# 个性选择忽略的词汇(根据需要增删)
ignore = []
year = datetime.datetime.now().year
month = datetime.datetime.now().month
day = datetime.datetime.now().day
with open(str(year)+'.'+str(month).rjust(2,'0')+'.'+str(day).rjust(2,'0')+'_word_list.txt', 'r', encoding='UTF-8') as f:
word_list = f.read().split(' ')
effective_words = []
for word in word_list:
if 1<len(word)<35 and word not in ignore:
effective_words.append(word)
# 统计出现最多的n个词汇
num = 50
most_common_words = Counter(effective_words).most_common(num)
print('\n出现频率最高的前', num, '个词汇:')
for word in most_common_words:
print(word)
print()

View File

@@ -0,0 +1,59 @@
"""
This code is supported by the website: https://www.guanjihuan.com
The newest version of this code is on the web page: https://www.guanjihuan.com/archives/13623
"""
from bs4 import BeautifulSoup
from urllib.request import urlopen
import re
from collections import Counter
import datetime
import random
import time
ignore_inner = ['alt="Figure', 'the', '<math', 'to', 'of', 'in', 'under', '<i', 'and', 'by', 'The', 'at', 'with', 'up', 'be', 'above', 'below', 'are', 'is', 'for', 'that', 'as', 'we', '<a', 'abstract', 'abstract"','<span', 'which', 'We', 'such', 'has', 'two', 'these', 'it', 'all', 'results', 'result', 'each', 'have', 'between', 'on', 'an', 'can', 'also', 'from', 'Our', 'our', 'using', 'where', 'These', 'out', 'both', 'due', 'less', 'along', 'but', 'In', 'show', 'into', 'study', 'find', 'provide', 'change', '(<math','not', 'open', 'this', 'show', 'into', 'study', 'find', 'provide', 'change', 'present', 'Using', 'large', 'This', 'However', 'appear', 'studied', 'obtain', 'been', 'Both', 'they', 'effects', 'effect', 'compute', 'more', 'does', 'shown', 'Based', 'reveal', 'highly', 'number', 'However,', 'was', 'near', 'full', 'based', 'several', 'suggest', 'agreement', 'predicted', 'values', 'work', 'emphasize', 'without', 'or', 'work,', 'studies', 'future', 'identify', 'present.', 'predict', 'presence', 'their', 'were', 'From', 'its', 'By', 'how', 'ground', 'observed', 'recent', 'For', 'other', 'Here', 'test', 'further', 'Its', 'similar', 'however,', 'range', 'within', 'value', 'possible', 'may', 'than', 'low', 'us', 'obtained', 'around', 'consider', 'about', 'very', 'will', 'when', 'played', 'consist', 'consists', 'Here,', 'observe', 'gives', 'It', 'over', 'cannot', 'As', 'whose', 'new', 'some', 'only', 'from', 'yields', 'shows', 'data', 'direct', 'related', 'different', 'evidence', 'role', 'function', 'origin', 'specific', 'set', 'confirm', 'give', 'Moreover', 'develop', 'including', 'could', 'used', 'means', 'allows', 'make', 'e.g.,', 'provides', 'system', 'systems', 'field', 'fields', 'model', 'model,', 'state', 'states', 'states.', 'state.', 'band', 'bands', 'method', 'methods', 'nature', 'rate', 'zero', 'single', 'theory', 'first', 'one', 'complex', 'approach', 'schemes', 'terms', 'even', 'case', 'analysis', 'weight', 'volume', 'evolution', 'well', 'external', 'measured', 'introducing', 'dependence', 'properties', 'demonstrate', 'remains', 'through', 'measurements', 'samples', 'findings', 'respect', 'investigate', 'behavior', 'importance', 'considered', 'experimental', 'increase', 'propose', 'follows', 'increase', 'emerged', 'interesting', 'behaviors', 'influenced', 'paramount', 'indicate', 'Rev.', 'concepts', 'induced', 'zone', 'regions', 'exact', 'contribution', 'behavior', 'formation', 'measurements.', 'utilizing', 'constant', 'regime', 'features', 'strength', 'compare', 'determined', 'combination', 'compare', 'determined', 'At', 'inside', 'ambient', 'then', 'important', 'report', 'Moreover,', 'Despite', 'found', 'because', 'process', 'and,', 'significantly', 'realized', 'much', 'natural', 'since', 'grows', 'any', 'compared', 'while', 'forms.', 'appears', 'indicating', 'coefficient', 'suggested', 'time', 'exhibits', 'calculations.', 'developed', 'array', 'discuss', 'field', 'becomes', 'allowing', 'indicates', 'via', 'introduce', 'considering', 'times.', 'constructed', 'explain', 'form', 'owing', 'parameters.', 'parameter', 'operation', 'probe', 'experiments', 'interest', 'strategies', 'seen', 'emerge', 'generic', 'geometry', 'numbers', 'observation', 'avenue', 'theretically', 'three', 'excellent', 'amount', 'notable', 'example', 'being', 'promising', 'latter', 'little', 'imposed', 'put', 'resource', 'together', 'produce', 'successfully','there', 'enhanced', 'this', 'great', 'dirven', 'increasing','should', 'otherwise', 'Further', 'field,', 'known', 'changes', 'still', 'beyond', 'various', 'center', 'previously', 'way', 'peculiar', 'detailed', 'understanding', 'good', 'years', 'where', 'Me', 'origins', 'years.', 'attributed', 'known,', 'them', 'reported', 'no', 'systems', 'agree', 'examined', 'rise', 'calculate', 'those', 'particular', 'relation', 'defined', 'either', 'again', 'current', 'exhibit', 'calculated', 'here', 'made', 'Further', 'consisting', 'constitutes', 'originated', 'if', 'exceed', 'access']
num = 50
year = datetime.datetime.now().year
month = datetime.datetime.now().month
day = datetime.datetime.now().day
for loop in range(2):
if loop == 0:
visit_link = "https://journals.aps.org/prb/recent"
with open('prb_recent_most_common_words.txt', 'r', encoding='UTF-8') as f0:
content_before = f0.read()
f = open('prb_recent_most_common_words.txt', 'w', encoding='UTF-8')
elif loop == 1:
visit_link = "https://journals.aps.org/prl/recent"
with open('prl_recent_most_common_words.txt', 'r', encoding='UTF-8') as f0:
content_before = f0.read()
f = open('prl_recent_most_common_words.txt', 'w', encoding='UTF-8')
html = urlopen(visit_link).read().decode('utf-8')
soup = BeautifulSoup(html, features='lxml')
all_a_tag = soup.find_all('a', href=True)
match_href = []
for a_tag in all_a_tag:
href = a_tag['href']
if re.search('https://journals.aps.org/.*/abstract', href) or re.search('.*/abstract/', href):
if href not in match_href and re.search('\?', href)==None:
if re.search('https://journals.aps.org', href)==None:
href = 'https://journals.aps.org'+ href
match_href.append(href)
all_word_list = []
for href in match_href:
time.sleep(random.uniform(0,2)) # 爬虫休息一秒左右,简单伪装
html = urlopen(href).read().decode('utf-8')
abstract = re.findall('<a name="abstract">.*<li>Received', html, re.S)[0]
word_list = abstract.split(' ')
word_list_for_one_href = []
for word in word_list:
if 1<len(word)<35 and word not in ignore_inner and re.search('class=', word)==None and re.search('data-', word)==None and re.search('<', word)==None and re.search('>', word)==None and re.search('href', word)==None:
if word not in word_list_for_one_href:
word_list_for_one_href.append(word)
all_word_list.append(word)
most_common_words = Counter(all_word_list).most_common(num)
f.write(str(year)+'.'+str(month).rjust(2,'0')+'.'+str(day).rjust(2,'0')+'|number_of_papers='+str(len(match_href)))
for word in most_common_words:
f.write('|'+str(word))
f.write('\n\n')
f.write(content_before)
f.close()